- SET arch=%platform%
- IF "%platform%"=="x86" SET arch=Win32
- - cmake -A%arch% -S. -Bbuild -DCMAKE_INSTALL_PREFIX:PATH=.
+ - cmake -A%arch% -S. -Bbuild -DCMAKE_INSTALL_PREFIX:PATH=. -DENABLE_GTEST:BOOL=ON -DENABLE_BENCHMARK:BOOL=ON -DCMAKE_BUILD_TYPE=Release
- cmake --build build --config Release --target install
+ test_script:
+ - cd build
+ - ctest --verbose -C Release
after_build:
- 7z a OpenCC.zip build/bin build/include build/lib build/share
artifacts:
- name: Install dependencies
run: |
python -m pip install --upgrade pip
- pip install flake8 pytest
+ pip install flake8 pytest wheel
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
- name: Lint with flake8
run: |
CMakeLists.txt
*.cmake
*.pyc
+*.cmd
+*.tgz
+/.github
+/.vscode
+/.appveyor.yml
+/.clang-format
+/.travis.yml
+/Makefile
+/src/*Test.cpp
+/src/*TestBase.cpp
/doc
/data/scheme
+/deps/google-benchmark
+/deps/gtest-1.11.0
+/deps/tclap-1.2.2
/build
/debug
/release
/doc/html
/opencc.xcodeproj
/python
+/src/benchmark
/test/benchmark
/test/dict.ocd
/test/dict.txt
/test/dict.bin
+/test/CommandLineConvertTest.cpp
/node_modules
/xcode
Author:
-BYVoid <byvoid@byvoid.com>
+Carbo Kuo <byvoid@byvoid.com>
Contributors:
Peng Huang <shawn.p.huang@gmail.com>
#
# Open Chinese Convert
#
-# Copyright 2010-2020 BYVoid <byvoid@byvoid.com>
+# Copyright 2010-2020 Carbo Kuo <byvoid@byvoid.com>
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
set (PACKAGE_BUGREPORT https://github.com/BYVoid/Opencc/issues)
set (OPENCC_VERSION_MAJOR 1)
set (OPENCC_VERSION_MINOR 1)
-set (OPENCC_VERSION_REVISION 0)
+set (OPENCC_VERSION_REVISION 1)
if (CMAKE_BUILD_TYPE MATCHES Debug)
set (version_suffix .Debug)
######## Mac OS X
-set(CMAKE_MACOSX_RPATH 1)
+if (${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
+ set(CMAKE_MACOSX_RPATH 1)
+endif()
######## Directory
#
# Open Chinese Convert
#
-# Copyright 2010-2020 BYVoid <byvoid@byvoid.com>
+# Copyright 2010-2020 Carbo Kuo <byvoid@byvoid.com>
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
python-build:
cd python; python setup.py build
-python-install:
+python-install: python-build
cd python; python setup.py install
-python-develop:
- cd python; python setup.py develop
-
python-test: python-build
- cd python; python setup.py test
+ cd python; pytest .
test-all: test node-test python-test
format:
- find "src" "node" -iname "*.hpp" -o -iname "*.cpp" -o -iname "*.cc" \
+ find "src" "node" "test" -iname "*.hpp" -o -iname "*.cpp" -o -iname "*.cc" \
-o -iname "*.c" -o -iname "*.h" \
| xargs clang-format -i
clean:
- rm -rf build xcode
+ rm -rf build xcode python/opencc/clib
install: build
make -C build/rel install VERBOSE=${VERBOSE} PREFIX=${PREFIX}
# Change History of OpenCC
+## Version 1.1.1
+
+2020年5月22日
+
+* 正式提供[Python](https://pypi.org/project/OpenCC/)接口和TypeScript類型標註。
+* 更新動態鏈接庫`SOVERSION`到`1.1`,由於C++內部接口發生變更。
+* 進一步改進與Windows MSVC的兼容性。
+* 簡化頭文件結構,加快編譯速度。刪除不必要的`using`。
+* 修復部分香港標準字。
+
## Version 1.1.0
2020年5月10日
https://opencc.byvoid.com/
-### Command Line
-
-* `opencc --help`
-* `opencc_dict --help`
-* `opencc_phrase_extract --help`
-
### Node.js
+[npm](https://www.npmjs.com/opencc) `npm i install opencc`
+
+#### JavaScript
```js
const OpenCC = require('opencc');
-const opencc = new OpenCC('s2t.json');
-opencc.convertPromise("汉字").then(converted => {
+const converter = new OpenCC('s2t.json');
+converter.convertPromise("汉字").then(converted => {
console.log(converted); // 漢字
});
```
-See [demo.js](https://github.com/BYVoid/OpenCC/blob/master/node/demo.js).
+#### TypeScript
+```ts
+import { OpenCC } from 'opencc';
+async function main() {
+ const converter: OpenCC = new OpenCC('s2t.json');
+ const result: string = await converter.convertPromise('汉字');
+ console.log(result);
+}
+```
+
+See [demo.js](https://github.com/BYVoid/OpenCC/blob/master/node/demo.js) and [ts-demo.ts](https://github.com/BYVoid/OpenCC/blob/master/node/ts-demo.ts).
+
+### Python
+
+[PyPI](https://pypi.org/project/OpenCC/) `pip install opencc` (Windows, Linux, Mac)
+
+```python
+import opencc
+converter = opencc.OpenCC('s2t.json')
+converter.convert('汉字') # 漢字
+```
+
+### C++
-### C++ Document 文檔
+```c++
+#include "opencc.h"
-https://byvoid.github.io/OpenCC/
+int main() {
+ const SimpleConverter converter("s2t.json");
+ converter.Convert("汉字"); // 漢字
+ return 0;
+}
+```
+
+Document 文檔: https://byvoid.github.io/OpenCC/
+
+### Command Line
+
+* `opencc --help`
+* `opencc_dict --help`
+* `opencc_phrase_extract --help`
### Others (Unofficial)
* Java: [opencc4j](https://github.com/houbb/opencc4j)
* Android: [android-opencc](https://github.com/qichuan/android-opencc)
* PHP: [opencc4php](https://github.com/nauxliu/opencc4php)
-* Python (Reimplementation): [opencc-python](https://github.com/yichen0831/opencc-python)
-* Python (C++ binding): [opencc-python](https://github.com/lepture/opencc-python)
* WebAssembly: [wasm-opencc](https://github.com/oyyd/wasm-opencc)
### Configurations 配置文件
### Build with CMake
-Linux (g++ 4.6 is required) and Mac OS X (clang 3.2 is required):
+#### Linux & Mac OS X
+
+g++ 4.6+ or clang 3.2+ is required.
```bash
make
```
-Windows Visual Studio:
+#### Windows Visual Studio:
```bash
-cmake -S. -Bbuild -DCMAKE_INSTALL_PREFIX:PATH=.
-cmake --build build --config Release --target install
+build.cmd
```
### Test 測試
+#### Linux & Mac OS X
+
```
make test
```
+#### Windows Visual Studio:
+
+```bash
+test.cmd
+```
+
### Benchmark 基準測試
```
--- /dev/null
+cmake -S. -Bbuild -DCMAKE_INSTALL_PREFIX:PATH=.
+cmake --build build --config Release --target install
蘊 藴
蛻 蜕
衆 眾
+衕 同
+衚 胡
衛 衞
覈 核
說 説
贗 贋
踊 踴
+蹟 跡
轀 輼
醞 醖
鉢 缽
修曼德 修曼德
修杰楷 修杰楷
修枝 修枝
-修桥舖路 修橋舖路
修桥补路 修橋補路
+修桥铺路 修橋鋪路
修樾 脩樾
修正 修正
修正为 修正爲
卷舌元音 捲舌元音
卷舌音 捲舌音
卷舒 卷舒
-卷舖盖 捲舖蓋
卷菸 捲菸
卷落叶 捲落葉
卷衣袖 捲衣袖
当罏红袖 當罏紅袖
当者披靡 當者披靡
当耳边风 當耳邊風
-当舖 當舖
当艄拿舵 當艄拿舵
当艄顺 當艄順
当花 當花
彩色缤纷 彩色繽紛
彩虹 彩虹
彩虹仙子 彩虹仙子
-彩虹冰舖 彩虹冰舖
+彩虹冰铺 彩虹冰鋪
彩虹桥 彩虹橋
彩蛋 彩蛋
彩蝶 彩蝶
药膏 藥膏
药膛 藥膛
药膳 藥膳
-药舖 藥舖
药茶 藥茶
药草 藥草
药草茶 藥草茶
變量 變數
軟件 軟體
軟驅 軟碟機
+轉義字符 跳脫字元
通信 通訊
通訊卡 通話卡
通配符 萬用字元
--- /dev/null
+<?xml version="1.0" encoding="utf-8"?>\r
+<!-- Generator: Adobe Illustrator 15.0.0, SVG Export Plug-In -->\r
+<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd" [\r
+ <!ENTITY ns_flows "http://ns.adobe.com/Flows/1.0/">\r
+]>\r
+<svg version="1.1"\r
+ xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:a="http://ns.adobe.com/AdobeSVGViewerExtensions/3.0/"\r
+ x="0px" y="0px" width="100px" height="100px" viewBox="-10 -10.48 100 100"\r
+ overflow="visible" enable-background="new -10 -10.48 100 100" xml:space="preserve">\r
+<defs>\r
+</defs>\r
+<g>\r
+ <path d="M47.2,24.959c0.746,0,2.346,0.64,4.8,1.92c2.026,1.12,3.04,2.16,3.04,3.12c0,0.428-0.56,0.88-1.68,1.36\r
+ c-2.4,1.174-4.641,2.56-6.721,4.16l-3.92,2.96c-0.427,0.268-0.8,0.507-1.12,0.72c0.693,0.427,1.093,1.014,1.2,1.759\r
+ c0.106,1.068,0.187,2.107,0.24,3.121c1.066-0.107,2.133-0.188,3.2-0.24c6.826-0.641,12.053-1.36,15.68-2.16\r
+ c1.546-0.32,2.48-0.48,2.8-0.48c0.32,0,1.761,0.48,4.32,1.44c2.08,1.067,3.12,2.054,3.12,2.96c0,0.854-1.44,1.279-4.32,1.279H55.76\r
+ c-3.308,0-7.467,0.16-12.479,0.48c0.319,4.48,0.479,8.48,0.479,12c0,6.027-0.587,10.267-1.76,12.721\r
+ c-1.067,1.706-2.054,2.56-2.96,2.56c-0.48,0-1.04-0.374-1.68-1.12c-2.668-3.414-5.094-6-7.28-7.76\r
+ c-0.48-0.267-0.72-0.533-0.72-0.801c0-0.212,0.106-0.319,0.32-0.319c0.692,0,1.786,0.427,3.28,1.28\r
+ c2.026,0.64,3.333,0.959,3.92,0.959c0.427,0,0.772-0.292,1.04-0.879c0.427-0.961,0.772-2.507,1.04-4.641\r
+ c0.267-2.452,0.4-5.732,0.4-9.84c0-1.387-0.028-2.666-0.08-3.84c-0.748,0.054-1.468,0.107-2.16,0.16\r
+ c-5.388,0.373-11.254,1.279-17.6,2.72c-1.654,0.427-2.748,0.64-3.28,0.64c-0.908,0-2.32-0.372-4.24-1.119\r
+ c-1.974-0.801-2.96-1.467-2.96-2c0-0.693,0.64-1.094,1.92-1.201c6.826-0.105,11.866-0.451,15.12-1.039\r
+ c4.052-0.533,8.346-1.013,12.88-1.44C38.8,43.493,38.612,42.693,38.4,42c-0.374-1.387-0.96-2.347-1.761-2.88\r
+ c-0.479-0.266-0.72-0.506-0.72-0.72c0-0.586,0.532-0.88,1.6-0.88c0.427,0,1.2,0.24,2.32,0.72c2.24-2.613,4.08-5.066,5.521-7.36\r
+ c0.372-0.586,0.56-0.986,0.56-1.2c0-0.426-0.294-0.64-0.88-0.64c-0.428,0-1.36,0.134-2.8,0.4C39.68,29.919,36,30.773,31.2,32\r
+ c-1.12,0.32-1.84,0.48-2.16,0.48c-0.48,0-1.308-0.292-2.48-0.88c-1.228-0.692-1.84-1.227-1.84-1.6c0-0.426,0.506-0.64,1.52-0.64\r
+ c4.16-0.266,7.706-0.772,10.64-1.52c2.88-0.64,5.732-1.412,8.561-2.32C46.347,25.147,46.933,24.959,47.2,24.959z M35.04,5.919\r
+ c1.12,0,2.532,0.454,4.24,1.36c1.706,1.068,2.826,2.108,3.359,3.12c0.533,1.12,0.801,2.24,0.801,3.36c0,1.173-0.32,2-0.96,2.48\r
+ c5.065-0.64,10.372-1.44,15.92-2.4c1.652-0.48,2.72-0.72,3.199-0.72c0.801,0,2.267,0.907,4.4,2.72\r
+ c2.452,1.974,3.68,3.468,3.68,4.48c0,1.12-1.04,1.76-3.12,1.92c-3.04,0.427-6.693,1.333-10.96,2.72\r
+ c-0.213,0-0.319-0.106-0.319-0.32c2.933-3.84,4.399-6.106,4.399-6.8c0-0.64-0.507-0.96-1.52-0.96\r
+ c-7.84,0.214-18.88,1.708-33.12,4.48c-1.228,0.107-1.974,0.16-2.24,0.16c-0.907,0-1.68-0.106-2.32-0.32\r
+ c-0.16,0.907-0.48,1.947-0.96,3.12c-0.854,1.974-1.654,3.334-2.4,4.08c-0.534,0.428-1.04,0.64-1.52,0.64\r
+ c-0.534,0-1.094-0.346-1.68-1.04c-0.32-0.586-0.48-1.28-0.48-2.08c0-0.746,0.16-1.386,0.48-1.92c1.066-1.333,2.32-4.132,3.76-8.4\r
+ c0.16-0.64,0.4-0.96,0.72-0.96c0.533,0,1.066,0.667,1.6,2c0.16,0.534,0.292,1.014,0.4,1.44h0.88c5.653-0.16,11.867-0.64,18.64-1.44\r
+ c-0.854-0.586-1.84-1.973-2.96-4.16c-1.707-3.36-2.56-5.44-2.56-6.24C34.4,6.08,34.612,5.973,35.04,5.919z"/>\r
+</g>\r
+</svg>\r
* @license
* Open Chinese Convert
*
- * Copyright 2010-2014 BYVoid <byvoid@byvoid.com>
+ * Copyright 2010-2014 Carbo Kuo <byvoid@byvoid.com>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
{
"variables": {
- "opencc_version": "1.1.0"
+ "opencc_version": "1.1.1"
},
"target_defaults": {
"defines": [
#include "DictConverter.cpp"
#include "DictEntry.cpp"
#include "DictGroup.cpp"
+#include "Lexicon.cpp"
#include "MarisaDict.cpp"
#include "MaxMatchSegmentation.cpp"
#include "Segmentation.cpp"
using namespace opencc;
-string ToUtf8String(const v8::Local<v8::Value>& val) {
+std::string ToUtf8String(const v8::Local<v8::Value>& val) {
Nan::Utf8String utf8(val);
- return string(*utf8);
+ return std::string(*utf8);
}
class OpenccBinding : public Nan::ObjectWrap {
struct ConvertRequest {
OpenccBinding* instance;
- string input;
- string output;
+ std::string input;
+ std::string output;
Nan::Callback* callback;
Optional<opencc::Exception> ex;
const ConverterPtr converter_;
public:
- explicit OpenccBinding(const string configFileName)
+ explicit OpenccBinding(const std::string configFileName)
: config_(), converter_(config_.NewFromFile(configFileName)) {}
virtual ~OpenccBinding() {}
- string Convert(const string& input) { return converter_->Convert(input); }
+ std::string Convert(const std::string& input) {
+ return converter_->Convert(input);
+ }
static NAN_METHOD(Version) {
info.GetReturnValue().Set(Nan::New<v8::String>(VERSION).ToLocalChecked());
try {
if (info.Length() >= 1 && info[0]->IsString()) {
- const string configFile = ToUtf8String(info[0]);
+ const std::string configFile = ToUtf8String(info[0]);
instance = new OpenccBinding(configFile);
} else {
instance = new OpenccBinding("s2t.json");
OpenccBinding* instance =
Nan::ObjectWrap::Unwrap<OpenccBinding>(info.This());
- const string input = ToUtf8String(info[0]);
- string output;
+ const std::string input = ToUtf8String(info[0]);
+ std::string output;
try {
output = instance->Convert(input);
} catch (opencc::Exception& e) {
Nan::ThrowTypeError("Wrong arguments");
return;
}
- const string inputFileName = ToUtf8String(info[0]);
- const string outputFileName = ToUtf8String(info[1]);
- const string formatFrom = ToUtf8String(info[2]);
- const string formatTo = ToUtf8String(info[3]);
+ const std::string inputFileName = ToUtf8String(info[0]);
+ const std::string outputFileName = ToUtf8String(info[1]);
+ const std::string formatFrom = ToUtf8String(info[2]);
+ const std::string formatTo = ToUtf8String(info[3]);
try {
opencc::ConvertDictionary(inputFileName, outputFileName, formatFrom,
formatTo);
--- /dev/null
+declare class OpenCC {
+ constructor(config: string);
+ version(): string;
+ generateDict(inputFileName: string, outputFileName: string, formatFrom: string, formatTo: string): void;
+ convert(input: string, callback: (err: string, convertedText: string) => void): string;
+ convertSync(input: string): string;
+ convertPromise(input: string): Promise<string>;
+}
+export { OpenCC };
* @license
* Open Chinese Convert
*
- * Copyright 2010-2014 BYVoid <byvoid@byvoid.com>
+ * Copyright 2010-2014 Carbo Kuo <byvoid@byvoid.com>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
this.handler = new binding.Opencc(config);
};
+// This is to support both CommonJS and ES module.
+OpenCC.OpenCC = OpenCC;
+
/**
* The version of OpenCC library.
*
* @param outputFileName Output dictionary filename.
* @param formatFrom Input dictionary format.
* @param formatTo Input dictionary format.
- * @return Converted text.
* @ingroup node_api
*/
OpenCC.generateDict = function (inputFileName, outputFileName,
--- /dev/null
+/**
+ * @file
+ * Example of Node.js API.
+ *
+ * @license
+ * Open Chinese Convert
+ *
+ * Copyright 2010-2020 Carbo Kuo <byvoid@byvoid.com>
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import { OpenCC } from './opencc';
+
+async function main() {
+ const converter: OpenCC = new OpenCC('s2t.json');
+ const result: string = await converter.convertPromise('汉字');
+ console.log(result);
+}
+
+main();
{
"name": "opencc",
- "version": "1.0.6",
+ "version": "1.1.1",
"lockfileVersion": 1,
"requires": true,
"dependencies": {
"integrity": "sha1-FQStJSMVjKpA20onh8sBQRmU6k8="
},
"fsevents": {
- "version": "2.1.2",
- "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.1.2.tgz",
- "integrity": "sha512-R4wDiBwZ0KzpgOWetKDug1FZcYhqYnUYKtfZYt4mD5SBz76q0KR4Q9o7GIPamsVPGmW3EYPPJ0dOOjvx32ldZA==",
+ "version": "2.1.3",
+ "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.1.3.tgz",
+ "integrity": "sha512-Auw9a4AxqWpa9GUfj370BMPzzyncfBABW8Mab7BGWBYDj4Isgq+cDKtx0i6u9jcX9pQDnswsaaOTgTmA5pEjuQ==",
"dev": true,
"optional": true
},
}
},
"mocha": {
- "version": "7.1.1",
- "resolved": "https://registry.npmjs.org/mocha/-/mocha-7.1.1.tgz",
- "integrity": "sha512-3qQsu3ijNS3GkWcccT5Zw0hf/rWvu1fTN9sPvEd81hlwsr30GX2GcDSSoBxo24IR8FelmrAydGC6/1J5QQP4WA==",
+ "version": "7.1.2",
+ "resolved": "https://registry.npmjs.org/mocha/-/mocha-7.1.2.tgz",
+ "integrity": "sha512-o96kdRKMKI3E8U0bjnfqW4QMk12MwZ4mhdBTf+B5a1q9+aq2HRnj+3ZdJu0B/ZhJeK78MgYuv6L8d/rA5AeBJA==",
"dev": true,
"requires": {
"ansi-colors": "3.2.3",
"js-yaml": "3.13.1",
"log-symbols": "3.0.0",
"minimatch": "3.0.4",
- "mkdirp": "0.5.3",
+ "mkdirp": "0.5.5",
"ms": "2.1.1",
"node-environment-flags": "1.0.6",
"object.assign": "4.1.0",
"yargs-unparser": "1.6.0"
},
"dependencies": {
- "debug": {
- "version": "3.2.6",
- "resolved": "https://registry.npmjs.org/debug/-/debug-3.2.6.tgz",
- "integrity": "sha512-mel+jf7nrtEl5Pn1Qx46zARXKDpBbvzezse7p7LqINmdoIk8PYP5SySaxEmYv6TZ0JyEKA1hsCId6DIhgITtWQ==",
- "dev": true,
- "requires": {
- "ms": "^2.1.1"
- }
- },
"glob": {
"version": "7.1.3",
"resolved": "https://registry.npmjs.org/glob/-/glob-7.1.3.tgz",
"path-is-absolute": "^1.0.0"
}
},
- "mkdirp": {
- "version": "0.5.3",
- "resolved": "https://registry.npmjs.org/mkdirp/-/mkdirp-0.5.3.tgz",
- "integrity": "sha512-P+2gwrFqx8lhew375MQHHeTlY8AuOJSrGf0R5ddkEndUkmwpgUob/vQuBD1V22/Cw1/lJr4x+EjllSezBThzBg==",
- "dev": true,
- "requires": {
- "minimist": "^1.2.5"
- }
- },
"ms": {
"version": "2.1.1",
"resolved": "https://registry.npmjs.org/ms/-/ms-2.1.1.tgz",
"dev": true
},
"nan": {
- "version": "2.14.0",
- "resolved": "https://registry.npmjs.org/nan/-/nan-2.14.0.tgz",
- "integrity": "sha512-INOFj37C7k3AfaNTtX8RhsTw7qRy7eLET14cROi9+5HAVbbHuIWUHEauBv5qT4Av2tWasiTY1Jw6puUNqRJXQg=="
+ "version": "2.14.1",
+ "resolved": "https://registry.npmjs.org/nan/-/nan-2.14.1.tgz",
+ "integrity": "sha512-isWHgVjnFjh2x2yuJ/tj3JbwoHu3UC2dX5G/88Cm24yB6YopVgxvBObDY7n5xW6ExmFhJpSEQqFPvq9zaXc8Jw=="
},
"needle": {
"version": "2.4.1",
{
"name": "opencc",
- "version": "1.1.0-1",
+ "version": "1.1.1",
"description": "Conversion between Traditional and Simplified Chinese",
- "author": "BYVoid <byvoid@byvoid.com>",
+ "author": "Carbo Kuo <byvoid@byvoid.com>",
"license": "Apache-2.0",
"main": "node/opencc.js",
+ "types": "node/opencc.d.ts",
"scripts": {
"test": "mocha -R spec node/test.js",
"deploy": "node-pre-gyp package && (node-pre-gyp-github publish --release || exit 0)",
"Traditional Chinese"
],
"devDependencies": {
- "mocha": "^7.1.1",
+ "mocha": "^7.1.2",
"node-pre-gyp-github": "^1.4.3"
},
"dependencies": {
- "nan": "^2.14.0",
+ "nan": "^2.14.1",
"node-pre-gyp": "^0.14.0"
}
}
# Pyre type checker
.pyre/
-
-# Generated files
-opencc/version.py
--- /dev/null
+version.py
+clib/
import platform
import sys
from ctypes import CDLL, c_char_p, c_size_t, c_void_p, cast
-from ctypes.util import find_library
try:
from opencc.version import __version__ # noqa
__all__ = ['CONFIGS', 'convert', 'OpenCC']
-_libcfile = find_library('c') or 'libc.so.6'
-libc = CDLL(_libcfile, use_errno=True)
-libc.free.argtypes = [c_void_p]
_thisdir = os.path.dirname(os.path.abspath(__file__))
_system = platform.system()
if _system == 'Darwin':
- _libopenccfilename = 'libopencc.2.dylib'
+ _libopenccfilename = 'libopencc.1.1.dylib'
elif _system == 'Linux':
- _libopenccfilename = 'libopencc.so.2'
+ _libopenccfilename = 'libopencc.so.1.1'
+elif _system == 'Windows':
+ _libopenccfilename = 'opencc.dll'
else:
raise NotImplementedError('Not tested for {}'.format(_system))
-_libopenccfile = os.path.join(_thisdir, 'clib', 'lib', _libopenccfilename)
+
+if _system == 'Windows':
+ _libopenccfile = os.path.join(_thisdir, 'clib', 'bin', _libopenccfilename)
+else:
+ _libopenccfile = os.path.join(_thisdir, 'clib', 'lib', _libopenccfilename)
libopencc = None
if os.path.isfile(_libopenccfile):
libopencc.opencc_open.restype = c_void_p
libopencc.opencc_convert_utf8.argtypes = [c_void_p, c_char_p, c_size_t]
libopencc.opencc_convert_utf8.restype = c_void_p
+ libopencc.opencc_convert_utf8_free.argtypes = [c_char_p]
+ libopencc.opencc_convert_utf8_free.restype = c_void_p
libopencc.opencc_close.argtypes = [c_void_p]
+ libopencc.opencc_error.argtypes = []
+ libopencc.opencc_error.restype = c_char_p
_opencc_share_dir = os.path.join(_thisdir, 'clib', 'share', 'opencc')
CONFIGS = []
if not os.path.isfile(config):
raise ValueError('Could not find file at {}'.format(config))
- self._od = libopencc.opencc_open(c_char_p(config.encode('utf-8')))
+ od = libopencc.opencc_open(c_char_p(config.encode('utf-8')))
+ if cast(od, c_void_p) == -1:
+ error = libopencc.opencc_error()
+ raise Exception(error.value)
+
+ self._od = od
def convert(self, text):
if isinstance(text, text_type):
retv_i = libopencc.opencc_convert_utf8(self._od, text, len(text))
if retv_i == -1:
- raise Exception('OpenCC Convert Error')
+ error = libopencc.opencc_error()
+ raise Exception(error.value)
+
retv_c = cast(retv_i, c_char_p)
value = retv_c.value
- libc.free(retv_c)
+ libopencc.opencc_convert_utf8_free(retv_c)
return value.decode('utf-8')
def __del__(self):
- libopencc.opencc_close(self._od)
+ if hasattr(self, '_od'):
+ libopencc.opencc_close(self._od)
import re
import subprocess
import sys
+import warnings
import setuptools
import setuptools.command.build_py
-import setuptools.command.develop
-import setuptools.command.install
-import setuptools.command.test
+import wheel.bdist_wheel
from opencc import _libopenccfile
version_info[1] = match.group(2)
elif match.group(1) == 'REVISION':
version_info[2] = match.group(2)
- return '.'.join(version_info)
+ version = '.'.join(version_info)
+ return version
def write_version_file(version_info):
if os.path.isfile(_libopenccfile):
return # Skip building binary file
- print('building libopencc')
- assert subprocess.call('command -v make', shell=True) == 0, \
- 'Build requires `make`'
- assert subprocess.call('command -v cmake', shell=True) == 0, \
- 'Build requires `cmake`'
- # Probably also needs to check for cpp-compilier
-
- errno = subprocess.call((
- 'mkdir -p {build_dir};'
- 'cmake '
- '-B {build_dir} '
- '-DBUILD_DOCUMENTATION:BOOL=OFF '
- '-DENABLE_GTEST:BOOL=OFF '
- '-DCMAKE_BUILD_TYPE=Release '
- '-DCMAKE_INSTALL_PREFIX={clib_dir} '
- '..;'
- 'make -C {build_dir} -j;'
- 'make -C {build_dir} install;'
- ).format(
- build_dir=_build_dir,
- clib_dir=_clib_dir
- ), shell=True)
-
- assert errno == 0, 'Build failed'
+ print('building libopencc into %s' % _build_dir)
+
+ def build_on_windows():
+ subprocess.call('md %s' % _build_dir, shell=True)
+ cmd = (
+ 'cmake '
+ '-B {build_dir} '
+ '-DBUILD_DOCUMENTATION:BOOL=OFF '
+ '-DENABLE_GTEST:BOOL=OFF '
+ '-DCMAKE_BUILD_TYPE=Release '
+ '-DCMAKE_INSTALL_PREFIX={clib_dir} '
+ '..'
+ ).format(
+ build_dir=_build_dir,
+ clib_dir=_clib_dir
+ )
+ errno = subprocess.call(cmd, shell=True)
+ assert errno == 0, 'Configure failed'
+ cmd = (
+ 'cmake --build {build_dir} --config Release --target install'
+ ).format(
+ build_dir=_build_dir
+ )
+ errno = subprocess.call(cmd, shell=True)
+ assert errno == 0, 'Build failed'
+
+ def build_on_posix():
+ assert subprocess.call('command -v make', shell=True) == 0, \
+ 'Build requires `make`'
+ assert subprocess.call('command -v cmake', shell=True) == 0, \
+ 'Build requires `cmake`'
+ # Probably also needs to check for cpp-compilier
+
+ errno = subprocess.call((
+ 'mkdir -p {build_dir};'
+ 'cmake '
+ '-B {build_dir} '
+ '-DBUILD_DOCUMENTATION:BOOL=OFF '
+ '-DENABLE_GTEST:BOOL=OFF '
+ '-DCMAKE_BUILD_TYPE=Release '
+ '-DCMAKE_INSTALL_PREFIX={clib_dir} '
+ '..;'
+ 'make -C {build_dir} -j;'
+ 'make -C {build_dir} install;'
+ ).format(
+ build_dir=_build_dir,
+ clib_dir=_clib_dir
+ ), shell=True)
+
+ assert errno == 0, 'Build failed'
+
+ if sys.platform == 'win32':
+ build_on_windows()
+ else:
+ build_on_posix()
assert os.path.isfile(_libopenccfile)
super(BuildPyCommand, self).run()
-class InstallCommand(setuptools.command.install.install, object):
- def run(self):
- build_libopencc()
- super(InstallCommand, self).run()
+class BDistWheelCommand(wheel.bdist_wheel.bdist_wheel, object):
+ """Custom bdsit_wheel command that will change
+ default plat-name based on PEP 425 and PEP 513
+ """
+ @staticmethod
+ def _determine_platform_tag():
+ if sys.platform == 'win32':
+ if 'amd64' in sys.version.lower():
+ return 'win-amd64'
+ return sys.platform
-class DevelopCommand(setuptools.command.develop.develop, object):
- def run(self):
- build_libopencc()
- super(DevelopCommand, self).run()
+ if sys.platform == 'darwin':
+ _, _, _, _, machine = os.uname()
+ return 'macosx-10.9-{}'.format(machine)
+
+ if os.name == 'posix':
+ _, _, _, _, machine = os.uname()
+ return 'manylinux1-{}'.format(machine)
+ warnings.warn(
+ 'Windows macos and linux are all not detected, '
+ 'Proper distribution name cannot be determined.')
+ from distutils.util import get_platform
+ return get_platform()
-class PyTestCommand(setuptools.command.test.test):
- def run_tests(self):
- import pytest
- errno = pytest.main([])
- sys.exit(errno)
+ def initialize_options(self):
+ super(BDistWheelCommand, self).initialize_options()
+ self.plat_name = self._determine_platform_tag()
version_info = get_version_info()
author_info = get_author_info()
setuptools.setup(
- name='opencc-py',
+ name='OpenCC',
version=version_info,
author=author_info[0],
author_email=author_info[1],
packages=['opencc'],
package_data={str('opencc'): [
+ 'clib/bin/*.dll',
'clib/include/opencc/*',
'clib/lib/libopencc.*',
'clib/share/opencc/*',
]},
cmdclass={
'build_py': BuildPyCommand,
- 'install': InstallCommand,
- 'develop': DevelopCommand,
- 'test': PyTestCommand,
+ 'bdist_wheel': BDistWheelCommand
},
- tests_require=['pytest'],
- test_suite='tests',
-
classifiers=[
'Development Status :: 5 - Production/Stable',
'Intended Audience :: Developers',
'Topic :: Software Development :: Localization',
],
license='Apache License 2.0',
- keywords='opencc convert chinese'
+ keywords=['opencc', 'convert', 'chinese']
)
/*
* Open Chinese Convert
*
- * Copyright 2010-2020 BYVoid <byvoid@byvoid.com>
+ * Copyright 2010-2020 Carbo Kuo <byvoid@byvoid.com>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* limitations under the License.
*/
+#include <algorithm>
+#include <cassert>
+#include <cstring>
+
#include "BinaryDict.hpp"
#include "Lexicon.hpp"
}
void BinaryDict::SerializeToFile(FILE* fp) const {
- string keyBuf, valueBuf;
- vector<size_t> keyOffsets, valueOffsets;
+ std::string keyBuf, valueBuf;
+ std::vector<size_t> keyOffsets, valueOffsets;
size_t keyTotalLength = 0, valueTotalLength = 0;
ConstructBuffer(keyBuf, keyOffsets, keyTotalLength, valueBuf, valueOffsets,
valueTotalLength);
}
std::string key = dict->keyBuffer.c_str() + keyOffset;
// Value offset
- vector<std::string> values;
+ std::vector<std::string> values;
for (size_t j = 0; j < numValues; j++) {
size_t valueOffset;
unitsRead = fread(&valueOffset, sizeof(size_t), 1, fp);
return dict;
}
-void BinaryDict::ConstructBuffer(string& keyBuf, vector<size_t>& keyOffset,
- size_t& keyTotalLength, string& valueBuf,
- vector<size_t>& valueOffset,
+void BinaryDict::ConstructBuffer(std::string& keyBuf,
+ std::vector<size_t>& keyOffset,
+ size_t& keyTotalLength, std::string& valueBuf,
+ std::vector<size_t>& valueOffset,
size_t& valueTotalLength) const {
keyTotalLength = 0;
valueTotalLength = 0;
/*
* Open Chinese Convert
*
- * Copyright 2010-2014 BYVoid <byvoid@byvoid.com>
+ * Copyright 2010-2014 Carbo Kuo <byvoid@byvoid.com>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
private:
LexiconPtr lexicon;
- string keyBuffer;
- string valueBuffer;
+ std::string keyBuffer;
+ std::string valueBuffer;
- void ConstructBuffer(string& keyBuffer, vector<size_t>& keyOffset,
- size_t& keyTotalLength, string& valueBuffer,
- vector<size_t>& valueOffset,
+ void ConstructBuffer(std::string& keyBuffer, std::vector<size_t>& keyOffset,
+ size_t& keyTotalLength, std::string& valueBuffer,
+ std::vector<size_t>& valueOffset,
size_t& valueTotalLength) const;
};
} // namespace opencc
/*
* Open Chinese Convert
*
- * Copyright 2015-2020 BYVoid <byvoid@byvoid.com>
+ * Copyright 2015-2020 Carbo Kuo <byvoid@byvoid.com>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
: binDict(new BinaryDict(textDict->GetLexicon())), fileName("dict.bin"){};
const BinaryDictPtr binDict;
- const string fileName;
+ const std::string fileName;
};
TEST_F(BinaryDictTest, Serialization) {
DictConverter.cpp
DictEntry.cpp
DictGroup.cpp
+ Lexicon.cpp
MarisaDict.cpp
MaxMatchSegmentation.cpp
PhraseExtract.cpp
OUTPUT_NAME
opencc
VERSION
- 1.0.0
+ 1.1.1
SOVERSION
- 2
+ 1.1
)
# Installation
# Gtest
if (ENABLE_GTEST)
+ if (WIN32)
+ add_custom_target(
+ copy_gtest_to_src
+ ${CMAKE_COMMAND} -E copy $<TARGET_FILE:gtest> ${CMAKE_CURRENT_BINARY_DIR}
+ COMMENT "Copy gtest"
+ )
+ add_custom_target(
+ copy_gtest_main_to_src
+ ${CMAKE_COMMAND} -E copy $<TARGET_FILE:gtest_main> ${CMAKE_CURRENT_BINARY_DIR}
+ COMMENT "Copy gtest_main"
+ )
+ endif()
+
foreach(TESTCASE ${UNITTESTS})
add_executable(${TESTCASE} ${TESTCASE}.cpp)
target_link_libraries(${TESTCASE} gtest gtest_main libopencc)
add_test(${TESTCASE} ${TESTCASE})
+ if (WIN32)
+ add_dependencies(${TESTCASE} copy_gtest_to_src copy_gtest_main_to_src)
+ endif()
endforeach()
endif()
/*
* Open Chinese Convert
*
- * Copyright 2010-2014 BYVoid <byvoid@byvoid.com>
+ * Copyright 2010-2014 Carbo Kuo <byvoid@byvoid.com>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
/*
* Open Chinese Convert
*
- * Copyright 2010-2014 BYVoid <byvoid@byvoid.com>
+ * Copyright 2010-2014 Carbo Kuo <byvoid@byvoid.com>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
#pragma warning(disable : 4251 4266 4350 4503 4512 4514 4710 4820)
#endif
-#include <algorithm>
-#include <fstream>
-#include <functional>
-#include <iostream>
-#include <list>
-#include <map>
+#include <cstddef>
#include <memory>
-#include <sstream>
#include <string>
#include <vector>
-#include <cassert>
-#include <cstddef>
-#include <cstdio>
-#include <cstring>
-#include <ctime>
-
-#include "Exception.hpp"
#include "Export.hpp"
#include "Optional.hpp"
-using std::list;
-using std::string;
-using std::vector;
-
// Forward decalarations and alias
namespace opencc {
class Config;
} // namespace opencc
#ifndef PKGDATADIR
-const string PACKAGE_DATA_DIRECTORY = "";
+const std::string PACKAGE_DATA_DIRECTORY = "";
#else // ifndef PKGDATADIR
-const string PACKAGE_DATA_DIRECTORY = PKGDATADIR "/";
+const std::string PACKAGE_DATA_DIRECTORY = PKGDATADIR "/";
#endif // ifndef PKGDATADIR
#ifndef VERSION
/*
* Open Chinese Convert
*
- * Copyright 2010-2014 BYVoid <byvoid@byvoid.com>
+ * Copyright 2010-2014 Carbo Kuo <byvoid@byvoid.com>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* limitations under the License.
*/
+#include <fstream>
+#include <list>
#include <unordered_map>
#include "document.h"
#include "ConversionChain.hpp"
#include "Converter.hpp"
#include "DictGroup.hpp"
+#include "Exception.hpp"
#include "MarisaDict.hpp"
#include "MaxMatchSegmentation.hpp"
#include "TextDict.hpp"
class ConfigInternal {
public:
- string configDirectory;
+ std::string configDirectory;
std::unordered_map<
- string, std::unordered_map<string, std::unordered_map<string, DictPtr>>>
+ std::string,
+ std::unordered_map<std::string, std::unordered_map<std::string, DictPtr>>>
dictCache;
const JSONValue& GetProperty(const JSONValue& doc, const char* name) {
if (!doc.HasMember(name)) {
- throw InvalidFormat("Required property not found: " + string(name));
+ throw InvalidFormat("Required property not found: " + std::string(name));
}
return doc[name];
}
const JSONValue& GetObjectProperty(const JSONValue& doc, const char* name) {
const JSONValue& obj = GetProperty(doc, name);
if (!obj.IsObject()) {
- throw InvalidFormat("Property must be an object: " + string(name));
+ throw InvalidFormat("Property must be an object: " + std::string(name));
}
return obj;
}
const JSONValue& GetArrayProperty(const JSONValue& doc, const char* name) {
const JSONValue& obj = GetProperty(doc, name);
if (!obj.IsArray()) {
- throw InvalidFormat("Property must be an array: " + string(name));
+ throw InvalidFormat("Property must be an array: " + std::string(name));
}
return obj;
}
const char* GetStringProperty(const JSONValue& doc, const char* name) {
const JSONValue& obj = GetProperty(doc, name);
if (!obj.IsString()) {
- throw InvalidFormat("Property must be a string: " + string(name));
+ throw InvalidFormat("Property must be a std::string: " +
+ std::string(name));
}
return obj.GetString();
}
- template <typename DICT> DictPtr LoadDictWithPaths(const string& fileName) {
+ template <typename DICT>
+ DictPtr LoadDictWithPaths(const std::string& fileName) {
// Working directory
std::shared_ptr<DICT> dict;
if (SerializableDict::TryLoadFromFile<DICT>(fileName, &dict)) {
throw FileNotFound(fileName);
}
- DictPtr LoadDictFromFile(const string& type, const string& fileName) {
+ DictPtr LoadDictFromFile(const std::string& type,
+ const std::string& fileName) {
if (type == "text") {
return LoadDictWithPaths<TextDict>(fileName);
}
DictPtr ParseDict(const JSONValue& doc) {
// Required: type
- string type = GetStringProperty(doc, "type");
+ std::string type = GetStringProperty(doc, "type");
if (type == "group") {
- list<DictPtr> dicts;
+ std::list<DictPtr> dicts;
const JSONValue& docs = GetArrayProperty(doc, "dicts");
for (rapidjson::SizeType i = 0; i < docs.Size(); i++) {
if (docs[i].IsObject()) {
}
return DictGroupPtr(new DictGroup(dicts));
} else {
- string fileName = GetStringProperty(doc, "file");
+ std::string fileName = GetStringProperty(doc, "file");
// Read from cache
DictPtr& cache = dictCache[type][configDirectory][fileName];
if (cache != nullptr) {
SegmentationPtr segmentation;
// Required: type
- string type = GetStringProperty(doc, "type");
+ std::string type = GetStringProperty(doc, "type");
if (type == "mmseg") {
// Required: dict
DictPtr dict = ParseDict(GetObjectProperty(doc, "dict"));
}
ConversionChainPtr ParseConversionChain(const JSONValue& docs) {
- list<ConversionPtr> conversions;
+ std::list<ConversionPtr> conversions;
for (rapidjson::SizeType i = 0; i < docs.Size(); i++) {
const JSONValue& doc = docs[i];
if (doc.IsObject()) {
return chain;
}
- string FindConfigFile(string fileName) {
+ std::string FindConfigFile(std::string fileName) {
std::ifstream ifs;
// Working directory
}
// Package data directory
if (PACKAGE_DATA_DIRECTORY != "") {
- string prefixedFileName = PACKAGE_DATA_DIRECTORY + fileName;
+ std::string prefixedFileName = PACKAGE_DATA_DIRECTORY + fileName;
ifs.open(UTF8Util::GetPlatformString(prefixedFileName).c_str());
if (ifs.is_open()) {
return prefixedFileName;
Config::~Config() { delete (ConfigInternal*)internal; }
-ConverterPtr Config::NewFromFile(const string& fileName) {
+ConverterPtr Config::NewFromFile(const std::string& fileName) {
ConfigInternal* impl = (ConfigInternal*)internal;
- string prefixedFileName = impl->FindConfigFile(fileName);
+ std::string prefixedFileName = impl->FindConfigFile(fileName);
std::ifstream ifs(UTF8Util::GetPlatformString(prefixedFileName));
- string content(std::istreambuf_iterator<char>(ifs),
- (std::istreambuf_iterator<char>()));
+ std::string content(std::istreambuf_iterator<char>(ifs),
+ (std::istreambuf_iterator<char>()));
#if defined(_WIN32) || defined(_WIN64)
UTF8Util::ReplaceAll(prefixedFileName, "\\", "/");
#endif // if defined(_WIN32) || defined(_WIN64)
size_t slashPos = prefixedFileName.rfind("/");
- string configDirectory = "";
- if (slashPos != string::npos) {
+ std::string configDirectory = "";
+ if (slashPos != std::string::npos) {
configDirectory = prefixedFileName.substr(0, slashPos) + "/";
}
return NewFromString(content, configDirectory);
}
-ConverterPtr Config::NewFromString(const string& json,
- const string& configDirectory) {
+ConverterPtr Config::NewFromString(const std::string& json,
+ const std::string& configDirectory) {
rapidjson::Document doc;
doc.ParseInsitu<0>(const_cast<char*>(json.c_str()));
}
// Optional: name
- string name;
+ std::string name;
if (doc.HasMember("name") && doc["name"].IsString()) {
name = doc["name"].GetString();
}
/*
* Open Chinese Convert
*
- * Copyright 2010-2014 BYVoid <byvoid@byvoid.com>
+ * Copyright 2010-2014 Carbo Kuo <byvoid@byvoid.com>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
virtual ~Config();
- ConverterPtr NewFromString(const string& json, const string& configDirectory);
+ ConverterPtr NewFromString(const std::string& json,
+ const std::string& configDirectory);
- ConverterPtr NewFromFile(const string& fileName);
+ ConverterPtr NewFromFile(const std::string& fileName);
private:
void* internal;
/*
* Open Chinese Convert
*
- * Copyright 2015 BYVoid <byvoid@byvoid.com>
+ * Copyright 2015 Carbo Kuo <byvoid@byvoid.com>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* limitations under the License.
*/
+#include <fstream>
+
#include "Config.hpp"
#include "ConfigTestBase.hpp"
#include "Converter.hpp"
+#include "Exception.hpp"
#include "TestUtilsUTF8.hpp"
namespace opencc {
Config config;
ConverterPtr converter;
- const string input;
- const string expected;
+ const std::string input;
+ const std::string expected;
};
TEST_F(ConfigTest, Convert) {
- const string& converted = converter->Convert(input);
+ const std::string& converted = converter->Convert(input);
EXPECT_EQ(expected, converted);
}
}
TEST_F(ConfigTest, NonexistingPath) {
- const string path = "/opencc/no/such/file/or/directory";
+ const std::string path = "/opencc/no/such/file/or/directory";
try {
- const ConverterPtr converter = config.NewFromFile(path);
+ const ConverterPtr _ = config.NewFromFile(path);
} catch (FileNotFound& e) {
EXPECT_EQ(path + " not found or not accessible.", e.what());
}
TEST_F(ConfigTest, NewFromStringWitoutTrailingSlash) {
std::ifstream ifs(CONFIG_TEST_PATH);
- string content(std::istreambuf_iterator<char>(ifs),
- (std::istreambuf_iterator<char>()));
- string pathWithoutTrailingSlash = CMAKE_SOURCE_DIR "/test/config_test";
+ std::string content(std::istreambuf_iterator<char>(ifs),
+ (std::istreambuf_iterator<char>()));
+ std::string pathWithoutTrailingSlash = CMAKE_SOURCE_DIR "/test/config_test";
- const ConverterPtr converter =
+ const ConverterPtr _ =
config.NewFromString(content, pathWithoutTrailingSlash);
}
/*
* Open Chinese Convert
*
- * Copyright 2015 BYVoid <byvoid@byvoid.com>
+ * Copyright 2015 Carbo Kuo <byvoid@byvoid.com>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
: CONFIG_TEST_PATH(CMAKE_SOURCE_DIR
"/test/config_test/config_test.json") {}
- const string CONFIG_TEST_PATH;
+ const std::string CONFIG_TEST_PATH;
};
} // namespace opencc
/*
* Open Chinese Convert
*
- * Copyright 2010-2014 BYVoid <byvoid@byvoid.com>
+ * Copyright 2010-2014 Carbo Kuo <byvoid@byvoid.com>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
using namespace opencc;
-string Conversion::Convert(const char* phrase) const {
+std::string Conversion::Convert(const char* phrase) const {
std::ostringstream buffer;
for (const char* pstr = phrase; *pstr != '\0';) {
Optional<const DictEntry*> matched = dict->MatchPrefix(pstr);
return buffer.str();
}
-string Conversion::Convert(const string& phrase) const {
+std::string Conversion::Convert(const std::string& phrase) const {
return Convert(phrase.c_str());
}
/*
* Open Chinese Convert
*
- * Copyright 2010-2014 BYVoid <byvoid@byvoid.com>
+ * Copyright 2010-2014 Carbo Kuo <byvoid@byvoid.com>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Conversion(DictPtr _dict) : dict(_dict) {}
// Convert single phrase
- string Convert(const string& phrase) const;
+ std::string Convert(const std::string& phrase) const;
// Convert single phrase
- string Convert(const char* phrase) const;
+ std::string Convert(const char* phrase) const;
// Convert segmented text
SegmentsPtr Convert(const SegmentsPtr& input) const;
/*
* Open Chinese Convert
*
- * Copyright 2010-2014 BYVoid <byvoid@byvoid.com>
+ * Copyright 2010-2014 Carbo Kuo <byvoid@byvoid.com>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* limitations under the License.
*/
+#include <list>
+
#include "ConversionChain.hpp"
#include "Segments.hpp"
using namespace opencc;
-ConversionChain::ConversionChain(const list<ConversionPtr> _conversions)
+ConversionChain::ConversionChain(const std::list<ConversionPtr> _conversions)
: conversions(_conversions) {}
SegmentsPtr ConversionChain::Convert(const SegmentsPtr& input) const {
/*
* Open Chinese Convert
*
- * Copyright 2010-2014 BYVoid <byvoid@byvoid.com>
+ * Copyright 2010-2014 Carbo Kuo <byvoid@byvoid.com>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
#pragma once
+#include <list>
+
#include "Common.hpp"
#include "Conversion.hpp"
*/
class OPENCC_EXPORT ConversionChain {
public:
- ConversionChain(const list<ConversionPtr> _conversions);
+ ConversionChain(const std::list<ConversionPtr> _conversions);
SegmentsPtr Convert(const SegmentsPtr& input) const;
- const list<ConversionPtr> GetConversions() const { return conversions; }
+ const std::list<ConversionPtr> GetConversions() const { return conversions; }
private:
- const list<ConversionPtr> conversions;
+ const std::list<ConversionPtr> conversions;
};
} // namespace opencc
/*
* Open Chinese Convert
*
- * Copyright 2015 BYVoid <byvoid@byvoid.com>
+ * Copyright 2015 Carbo Kuo <byvoid@byvoid.com>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
const size_t length = expected->Length();
EXPECT_TRUE(length == actual->Length());
for (size_t i = 0; i < length; i++) {
- EXPECT_EQ(string(expected->At(i)), string(actual->At(i)));
+ EXPECT_EQ(std::string(expected->At(i)), std::string(actual->At(i)));
}
}
const DictPtr& dictVariants = CreateDictForTaiwanVariants();
const ConversionPtr& conversionVariants =
ConversionPtr(new Conversion(dictVariants));
- const list<ConversionPtr> conversions{conversion, conversionVariants};
+ const std::list<ConversionPtr> conversions{conversion, conversionVariants};
const ConversionChainPtr& conversionChain =
ConversionChainPtr(new ConversionChain(conversions));
const SegmentsPtr& converted =
/*
* Open Chinese Convert
*
- * Copyright 2015 BYVoid <byvoid@byvoid.com>
+ * Copyright 2015 Carbo Kuo <byvoid@byvoid.com>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
DictPtr dict;
ConversionPtr conversion;
- const string input;
- const string expected;
+ const std::string input;
+ const std::string expected;
};
TEST_F(ConversionTest, ConvertString) {
- const string converted = conversion->Convert(input);
+ const std::string converted = conversion->Convert(input);
EXPECT_EQ(expected, converted);
}
TEST_F(ConversionTest, ConvertCString) {
- const string converted = conversion->Convert(input.c_str());
+ const std::string converted = conversion->Convert(input.c_str());
EXPECT_EQ(expected, converted);
}
/*
* Open Chinese Convert
*
- * Copyright 2010-2014 BYVoid <byvoid@byvoid.com>
+ * Copyright 2010-2014 Carbo Kuo <byvoid@byvoid.com>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* limitations under the License.
*/
-#include "Converter.hpp"
+#include <cstring>
+
#include "ConversionChain.hpp"
+#include "Converter.hpp"
#include "Segments.hpp"
using namespace opencc;
-string Converter::Convert(const string& text) const {
+std::string Converter::Convert(const std::string& text) const {
const SegmentsPtr& segments = segmentation->Segment(text);
const SegmentsPtr& converted = conversionChain->Convert(segments);
return converted->ToString();
}
size_t Converter::Convert(const char* input, char* output) const {
- const string& converted = Convert(input);
+ const std::string& converted = Convert(input);
strcpy(output, converted.c_str());
return converted.length();
}
/*
* Open Chinese Convert
*
- * Copyright 2010-2014 BYVoid <byvoid@byvoid.com>
+ * Copyright 2010-2014 Carbo Kuo <byvoid@byvoid.com>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
*/
class OPENCC_EXPORT Converter {
public:
- Converter(const string& _name, SegmentationPtr _segmentation,
+ Converter(const std::string& _name, SegmentationPtr _segmentation,
ConversionChainPtr _conversionChain)
: name(_name), segmentation(_segmentation),
conversionChain(_conversionChain) {}
- string Convert(const string& text) const;
+ std::string Convert(const std::string& text) const;
size_t Convert(const char* input, char* output) const;
}
private:
- const string name;
+ const std::string name;
const SegmentationPtr segmentation;
const ConversionChainPtr conversionChain;
};
/*
* Open Chinese Convert
*
- * Copyright 2010-2020 BYVoid <byvoid@byvoid.com>
+ * Copyright 2010-2020 Carbo Kuo <byvoid@byvoid.com>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* limitations under the License.
*/
-#include "DartsDict.hpp"
+#include <algorithm>
+#include <cstring>
+
#include "BinaryDict.hpp"
+#include "DartsDict.hpp"
#include "Lexicon.hpp"
#include "darts.h"
/*
* Open Chinese Convert
*
- * Copyright 2010-2014 BYVoid <byvoid@byvoid.com>
+ * Copyright 2010-2014 Carbo Kuo <byvoid@byvoid.com>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
/*
* Open Chinese Convert
*
- * Copyright 2015 BYVoid <byvoid@byvoid.com>
+ * Copyright 2015 Carbo Kuo <byvoid@byvoid.com>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
fileName("dict.ocd"){};
const DartsDictPtr dartsDict;
- const string fileName;
+ const std::string fileName;
};
TEST_F(DartsDictTest, DictTest) { TestDict(dartsDict); }
/*
* Open Chinese Convert
*
- * Copyright 2010-2014 BYVoid <byvoid@byvoid.com>
+ * Copyright 2010-2014 Carbo Kuo <byvoid@byvoid.com>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* limitations under the License.
*/
+#include <algorithm>
+
#include "Dict.hpp"
using namespace opencc;
Optional<const DictEntry*> Dict::MatchPrefix(const char* word,
- size_t len) const {
- string wordTrunc = UTF8Util::TruncateUTF8(word, KeyMaxLength());
- const char* wordTruncPtr = wordTrunc.c_str() + wordTrunc.length();
- for (long len = static_cast<long>(wordTrunc.length()); len > 0;) {
+ size_t wordLen) const {
+ long len = static_cast<long>((std::min)(KeyMaxLength(), wordLen));
+ std::string wordTrunc = UTF8Util::TruncateUTF8(word, len);
+ const char* wordTruncPtr = wordTrunc.c_str() + len;
+ for (; len > 0;) {
wordTrunc.resize(static_cast<size_t>(len));
wordTruncPtr = wordTrunc.c_str() + len;
const Optional<const DictEntry*>& result = Match(wordTrunc.c_str());
return Optional<const DictEntry*>::Null();
}
-vector<const DictEntry*> Dict::MatchAllPrefixes(const char* word,
- size_t len) const {
- vector<const DictEntry*> matchedLengths;
- string wordTrunc = UTF8Util::TruncateUTF8(word, KeyMaxLength());
- const char* wordTruncPtr = wordTrunc.c_str() + wordTrunc.length();
- for (long len = static_cast<long>(wordTrunc.length()); len > 0;
+std::vector<const DictEntry*> Dict::MatchAllPrefixes(const char* word,
+ size_t wordLen) const {
+ std::vector<const DictEntry*> matchedLengths;
+ long len = static_cast<long>((std::min)(KeyMaxLength(), wordLen));
+ std::string wordTrunc = UTF8Util::TruncateUTF8(word, len);
+ const char* wordTruncPtr = wordTrunc.c_str() + len;
+ for (; len > 0;
len -= static_cast<long>(UTF8Util::PrevCharLength(wordTruncPtr))) {
wordTrunc.resize(static_cast<size_t>(len));
wordTruncPtr = wordTrunc.c_str() + len;
/*
* Open Chinese Convert
*
- * Copyright 2010-2020 BYVoid <byvoid@byvoid.com>
+ * Copyright 2010-2020 Carbo Kuo <byvoid@byvoid.com>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
/**
* Matches a word exactly and returns the DictEntry or Optional::Null().
*/
- Optional<const DictEntry*> Match(const string& word) const {
+ Optional<const DictEntry*> Match(const std::string& word) const {
return Match(word.c_str(), word.length());
}
/**
* Matches the longest matched prefix of a word.
*/
- Optional<const DictEntry*> MatchPrefix(const string& word) const {
+ Optional<const DictEntry*> MatchPrefix(const std::string& word) const {
return MatchPrefix(word.c_str(), word.length());
}
* For example given a dictionary having "a", "an", "b", "ba", "ban", "bana",
* all the matched prefixes of "banana" are "bana", "ban", "ba", "b".
*/
- virtual vector<const DictEntry*> MatchAllPrefixes(const char* word,
- size_t len) const;
+ virtual std::vector<const DictEntry*> MatchAllPrefixes(const char* word,
+ size_t len) const;
/**
* Returns all matched prefixes of a word, sorted by the length (desc).
*/
- vector<const DictEntry*> MatchAllPrefixes(const string& word) const {
+ std::vector<const DictEntry*>
+ MatchAllPrefixes(const std::string& word) const {
return MatchAllPrefixes(word.c_str(), word.length());
}
/*
* Open Chinese Convert
*
- * Copyright 2010-2020 BYVoid <byvoid@byvoid.com>
+ * Copyright 2010-2020 Carbo Kuo <byvoid@byvoid.com>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
using namespace opencc;
-DictPtr LoadDictionary(const string& format, const string& inputFileName) {
+DictPtr LoadDictionary(const std::string& format,
+ const std::string& inputFileName) {
if (format == "text") {
return SerializableDict::NewFromFile<TextDict>(inputFileName);
} else if (format == "ocd") {
return nullptr;
}
-SerializableDictPtr ConvertDict(const string& format, const DictPtr dict) {
+SerializableDictPtr ConvertDict(const std::string& format, const DictPtr dict) {
if (format == "text") {
return TextDict::NewFromDict(*dict.get());
} else if (format == "ocd") {
}
namespace opencc {
-void ConvertDictionary(const string inputFileName, const string outputFileName,
- const string formatFrom, const string formatTo) {
+void ConvertDictionary(const std::string inputFileName,
+ const std::string outputFileName,
+ const std::string formatFrom,
+ const std::string formatTo) {
DictPtr dictFrom = LoadDictionary(formatFrom, inputFileName);
SerializableDictPtr dictTo = ConvertDict(formatTo, dictFrom);
dictTo->SerializeToFile(outputFileName);
/*
* Open Chinese Convert
*
- * Copyright 2010-2017 BYVoid <byvoid@byvoid.com>
+ * Copyright 2010-2017 Carbo Kuo <byvoid@byvoid.com>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* Converts a dictionary from a format to another.
* @ingroup opencc_cpp_api
*/
-OPENCC_EXPORT void ConvertDictionary(const string inputFileName,
- const string outputFileName,
- const string formatFrom,
- const string formatTo);
+OPENCC_EXPORT void ConvertDictionary(const std::string inputFileName,
+ const std::string outputFileName,
+ const std::string formatFrom,
+ const std::string formatTo);
} // namespace opencc
/*
* Open Chinese Convert
*
- * Copyright 2010-2020 BYVoid <byvoid@byvoid.com>
+ * Copyright 2010-2020 Carbo Kuo <byvoid@byvoid.com>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
using namespace opencc;
-string MultiValueDictEntry::ToString() const {
+std::string MultiValueDictEntry::ToString() const {
// TODO escape space
size_t i = 0;
size_t length = Values().size();
/*
* Open Chinese Convert
*
- * Copyright 2010-2020 BYVoid <byvoid@byvoid.com>
+ * Copyright 2010-2020 Carbo Kuo <byvoid@byvoid.com>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
virtual std::string Key() const = 0;
- virtual vector<std::string> Values() const = 0;
+ virtual std::vector<std::string> Values() const = 0;
virtual std::string GetDefault() const = 0;
virtual size_t NumValues() const = 0;
- virtual string ToString() const = 0;
+ virtual std::string ToString() const = 0;
size_t KeyLength() const { return Key().length(); }
class OPENCC_EXPORT NoValueDictEntry : public DictEntry {
public:
- NoValueDictEntry(const string& _key) : key(_key) {}
+ NoValueDictEntry(const std::string& _key) : key(_key) {}
virtual ~NoValueDictEntry() {}
virtual std::string Key() const { return key; }
- virtual vector<std::string> Values() const { return vector<std::string>(); }
+ virtual std::vector<std::string> Values() const {
+ return std::vector<std::string>();
+ }
virtual std::string GetDefault() const { return key; }
virtual size_t NumValues() const { return 0; }
- virtual string ToString() const { return key; }
+ virtual std::string ToString() const { return key; }
private:
- string key;
+ std::string key;
};
class OPENCC_EXPORT SingleValueDictEntry : public DictEntry {
public:
virtual std::string Value() const = 0;
- virtual vector<std::string> Values() const {
- return vector<std::string>{Value()};
+ virtual std::vector<std::string> Values() const {
+ return std::vector<std::string>{Value()};
}
virtual std::string GetDefault() const { return Value(); }
virtual size_t NumValues() const { return 1; }
- virtual string ToString() const { return string(Key()) + "\t" + Value(); }
+ virtual std::string ToString() const {
+ return std::string(Key()) + "\t" + Value();
+ }
};
class OPENCC_EXPORT StrSingleValueDictEntry : public SingleValueDictEntry {
public:
- StrSingleValueDictEntry(const string& _key, const string& _value)
+ StrSingleValueDictEntry(const std::string& _key, const std::string& _value)
: key(_key), value(_value) {}
virtual ~StrSingleValueDictEntry() {}
virtual std::string Value() const { return value; }
private:
- string key;
- string value;
+ std::string key;
+ std::string value;
};
class OPENCC_EXPORT MultiValueDictEntry : public DictEntry {
}
}
- virtual string ToString() const;
+ virtual std::string ToString() const;
};
class OPENCC_EXPORT StrMultiValueDictEntry : public MultiValueDictEntry {
public:
- StrMultiValueDictEntry(const string& _key, const vector<std::string>& _values)
+ StrMultiValueDictEntry(const std::string& _key,
+ const std::vector<std::string>& _values)
: key(_key), values(_values) {}
virtual ~StrMultiValueDictEntry() {}
size_t NumValues() const { return values.size(); }
- vector<std::string> Values() const { return values; }
+ std::vector<std::string> Values() const { return values; }
private:
- string key;
- vector<string> values;
+ std::string key;
+ std::vector<std::string> values;
};
class OPENCC_EXPORT DictEntryFactory {
public:
- static DictEntry* New(const string& key) { return new NoValueDictEntry(key); }
+ static DictEntry* New(const std::string& key) {
+ return new NoValueDictEntry(key);
+ }
- static DictEntry* New(const string& key, const string& value) {
+ static DictEntry* New(const std::string& key, const std::string& value) {
return new StrSingleValueDictEntry(key, value);
}
- static DictEntry* New(const string& key, const vector<string>& values) {
+ static DictEntry* New(const std::string& key,
+ const std::vector<std::string>& values) {
if (values.size() == 0) {
return New(key);
} else if (values.size() == 1) {
if (entry->NumValues() == 0) {
return new NoValueDictEntry(entry->Key());
} else if (entry->NumValues() == 1) {
- const auto svEntry = static_cast<const SingleValueDictEntry*>(entry);
- return new StrSingleValueDictEntry(svEntry->Key(), svEntry->Value());
+ return new StrSingleValueDictEntry(entry->Key(), entry->Values().front());
} else {
- const auto mvEntry = static_cast<const MultiValueDictEntry*>(entry);
- return new StrMultiValueDictEntry(mvEntry->Key(), mvEntry->Values());
+ return new StrMultiValueDictEntry(entry->Key(), entry->Values());
}
}
};
/*
* Open Chinese Convert
*
- * Copyright 2010-2014 BYVoid <byvoid@byvoid.com>
+ * Copyright 2010-2014 Carbo Kuo <byvoid@byvoid.com>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* limitations under the License.
*/
+#include <map>
+
#include "DictGroup.hpp"
#include "Lexicon.hpp"
#include "TextDict.hpp"
using namespace opencc;
-DictGroup::DictGroup(const list<DictPtr>& _dicts)
+DictGroup::DictGroup(const std::list<DictPtr>& _dicts)
: keyMaxLength(0), dicts(_dicts) {}
DictGroup::~DictGroup() {}
return Optional<const DictEntry*>::Null();
}
-vector<const DictEntry*> DictGroup::MatchAllPrefixes(const char* word,
- size_t len) const {
+std::vector<const DictEntry*> DictGroup::MatchAllPrefixes(const char* word,
+ size_t len) const {
std::map<size_t, const DictEntry*> matched;
// Match all prefixes from all dictionaries
for (const auto& dict : dicts) {
- const vector<const DictEntry*>& entries = dict->MatchAllPrefixes(word, len);
+ const std::vector<const DictEntry*>& entries =
+ dict->MatchAllPrefixes(word, len);
for (const auto& entry : entries) {
- size_t len = entry->KeyLength();
+ size_t entryLen = entry->KeyLength();
// If the current length has already result, skip
- if (matched.find(len) == matched.end()) {
- matched[len] = entry;
+ if (matched.find(entryLen) == matched.end()) {
+ matched[entryLen] = entry;
}
}
}
- vector<const DictEntry*> matchedEntries;
+ std::vector<const DictEntry*> matchedEntries;
for (auto i = matched.rbegin(); i != matched.rend(); i++) {
matchedEntries.push_back(i->second);
}
DictGroupPtr DictGroup::NewFromDict(const Dict& dict) {
TextDictPtr newDict = TextDict::NewFromDict(dict);
- return DictGroupPtr(new DictGroup(list<DictPtr>{newDict}));
+ return DictGroupPtr(new DictGroup(std::list<DictPtr>{newDict}));
}
/*
* Open Chinese Convert
*
- * Copyright 2010-2014 BYVoid <byvoid@byvoid.com>
+ * Copyright 2010-2014 Carbo Kuo <byvoid@byvoid.com>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
#pragma once
+#include <list>
+
#include "Common.hpp"
#include "Dict.hpp"
*/
class OPENCC_EXPORT DictGroup : public Dict {
public:
- DictGroup(const list<DictPtr>& dicts);
+ DictGroup(const std::list<DictPtr>& dicts);
static DictGroupPtr NewFromDict(const Dict& dict);
virtual Optional<const DictEntry*> MatchPrefix(const char* word,
size_t len) const;
- virtual vector<const DictEntry*> MatchAllPrefixes(const char* word,
- size_t len) const;
+ virtual std::vector<const DictEntry*> MatchAllPrefixes(const char* word,
+ size_t len) const;
virtual LexiconPtr GetLexicon() const;
- const list<DictPtr> GetDicts() const { return dicts; }
+ const std::list<DictPtr> GetDicts() const { return dicts; }
private:
const size_t keyMaxLength;
- const list<DictPtr> dicts;
+ const std::list<DictPtr> dicts;
};
} // namespace opencc
/*
* Open Chinese Convert
*
- * Copyright 2015 BYVoid <byvoid@byvoid.com>
+ * Copyright 2015 Carbo Kuo <byvoid@byvoid.com>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
TEST_F(DictGroupTest, TaiwanPhraseGroupTest) {
const DictGroupPtr dictGroup(new DictGroup(
- list<DictPtr>{CreateDictForPhrases(), CreateTaiwanPhraseDict()}));
+ std::list<DictPtr>{CreateDictForPhrases(), CreateTaiwanPhraseDict()}));
{
const auto& entry = dictGroup->Dict::MatchPrefix(utf8("鼠标"));
EXPECT_EQ(utf8("鼠標"), entry.Get()->GetDefault());
/*
* Open Chinese Convert
*
- * Copyright 2015 BYVoid <byvoid@byvoid.com>
+ * Copyright 2015 Carbo Kuo <byvoid@byvoid.com>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
DictPtr phrasesDict = CreateDictForPhrases();
DictPtr charactersDict = CreateDictForCharacters();
DictGroupPtr dictGroup(
- new DictGroup(list<DictPtr>{phrasesDict, charactersDict}));
+ new DictGroup(std::list<DictPtr>{phrasesDict, charactersDict}));
return dictGroup;
}
};
/*
* Open Chinese Convert
*
- * Copyright 2010-2014 BYVoid <byvoid@byvoid.com>
+ * Copyright 2010-2014 Carbo Kuo <byvoid@byvoid.com>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
/*
* Open Chinese Convert
*
- * Copyright 2010-2014 BYVoid <byvoid@byvoid.com>
+ * Copyright 2010-2014 Carbo Kuo <byvoid@byvoid.com>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
--- /dev/null
+/*
+ * Open Chinese Convert
+ *
+ * Copyright 2020 Carbo Kuo <byvoid@byvoid.com>
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <algorithm>
+
+#include "Lexicon.hpp"
+namespace opencc {
+
+void Lexicon::Sort() {
+ std::sort(entries.begin(), entries.end(), DictEntry::UPtrLessThan);
+}
+
+bool Lexicon::IsSorted() {
+ return std::is_sorted(entries.begin(), entries.end(),
+ DictEntry::UPtrLessThan);
+}
+
+} // namespace opencc
/*
* Open Chinese Convert
*
- * Copyright 2010-2014 BYVoid <byvoid@byvoid.com>
+ * Copyright 2010-2014 Carbo Kuo <byvoid@byvoid.com>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
class OPENCC_EXPORT Lexicon {
public:
Lexicon() {}
- Lexicon(vector<std::unique_ptr<DictEntry>> entries_)
+ Lexicon(std::vector<std::unique_ptr<DictEntry>> entries_)
: entries(std::move(entries_)) {}
Lexicon(const Lexicon&) = delete;
Lexicon& operator=(const Lexicon&) = delete;
entries.push_back(std::move(entry));
}
- void Sort() {
- std::sort(entries.begin(), entries.end(), DictEntry::UPtrLessThan);
- }
+ void Sort();
- bool IsSorted() {
- return std::is_sorted(entries.begin(), entries.end(),
- DictEntry::UPtrLessThan);
- }
+ bool IsSorted();
const DictEntry* At(size_t index) const { return entries.at(index).get(); }
size_t Length() const { return entries.size(); }
- vector<std::unique_ptr<DictEntry>>::const_iterator begin() const {
+ std::vector<std::unique_ptr<DictEntry>>::const_iterator begin() const {
return entries.begin();
}
- vector<std::unique_ptr<DictEntry>>::const_iterator end() const {
+ std::vector<std::unique_ptr<DictEntry>>::const_iterator end() const {
return entries.end();
}
private:
- vector<std::unique_ptr<DictEntry>> entries;
+ std::vector<std::unique_ptr<DictEntry>> entries;
};
} // namespace opencc
/*
* Open Chinese Convert
*
- * Copyright 2020 BYVoid <byvoid@byvoid.com>
+ * Copyright 2020 Carbo Kuo <byvoid@byvoid.com>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* limitations under the License.
*/
-#include "marisa.h"
+#include <algorithm>
+#include <cstring>
#include <unordered_map>
+#include "marisa.h"
+
#include "Lexicon.hpp"
#include "MarisaDict.hpp"
#include "SerializedValues.hpp"
}
}
-vector<const DictEntry*> MarisaDict::MatchAllPrefixes(const char* word,
- size_t len) const {
+std::vector<const DictEntry*> MarisaDict::MatchAllPrefixes(const char* word,
+ size_t len) const {
const marisa::Trie& trie = *internal->marisa;
marisa::Agent agent;
agent.set_query(word, (std::min)(maxLength, len));
- vector<const DictEntry*> matches;
+ std::vector<const DictEntry*> matches;
while (trie.common_prefix_search(agent)) {
matches.push_back(lexicon->At(agent.key().id()));
}
// Extract lexicon from built Marisa Trie, in order to get the order of keys.
marisa::Agent agent;
agent.set_query("");
- vector<std::unique_ptr<DictEntry>> entries;
+ std::vector<std::unique_ptr<DictEntry>> entries;
entries.resize(values_lexicon->Length());
size_t maxLength = 0;
while (dict->internal->marisa->predictive_search(agent)) {
// Extract lexicon from built Marisa Trie, in order to get the order of keys.
marisa::Agent agent;
agent.set_query("");
- vector<std::unique_ptr<DictEntry>> entries;
+ std::vector<std::unique_ptr<DictEntry>> entries;
entries.resize(thatLexicon->Length());
while (dict->internal->marisa->predictive_search(agent)) {
std::string key(agent.key().ptr(), agent.key().length());
/*
* Open Chinese Convert
*
- * Copyright 2020 BYVoid <byvoid@byvoid.com>
+ * Copyright 2020 Carbo Kuo <byvoid@byvoid.com>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
virtual Optional<const DictEntry*> MatchPrefix(const char* word,
size_t len) const;
- virtual vector<const DictEntry*> MatchAllPrefixes(const char* word,
- size_t len) const;
+ virtual std::vector<const DictEntry*> MatchAllPrefixes(const char* word,
+ size_t len) const;
virtual LexiconPtr GetLexicon() const;
/*
* Open Chinese Convert
*
- * Copyright 2020 BYVoid <byvoid@byvoid.com>
+ * Copyright 2020 Carbo Kuo <byvoid@byvoid.com>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
: dict(MarisaDict::NewFromDict(*textDict)), fileName("dict.ocd2"){};
const MarisaDictPtr dict;
- const string fileName;
+ const std::string fileName;
};
TEST_F(MarisaDictTest, DictTest) { TestDict(dict); }
/*
* Open Chinese Convert
*
- * Copyright 2010-2014 BYVoid <byvoid@byvoid.com>
+ * Copyright 2010-2014 Carbo Kuo <byvoid@byvoid.com>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
using namespace opencc;
-SegmentsPtr MaxMatchSegmentation::Segment(const string& text) const {
+SegmentsPtr MaxMatchSegmentation::Segment(const std::string& text) const {
SegmentsPtr segments(new Segments);
const char* segStart = text.c_str();
size_t segLength = 0;
/*
* Open Chinese Convert
*
- * Copyright 2010-2014 BYVoid <byvoid@byvoid.com>
+ * Copyright 2010-2014 Carbo Kuo <byvoid@byvoid.com>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
virtual ~MaxMatchSegmentation() {}
- virtual SegmentsPtr Segment(const string& text) const;
+ virtual SegmentsPtr Segment(const std::string& text) const;
const DictPtr GetDict() const { return dict; }
/*
* Open Chinese Convert
*
- * Copyright 2015 BYVoid <byvoid@byvoid.com>
+ * Copyright 2015 Carbo Kuo <byvoid@byvoid.com>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
TEST_F(MaxMatchSegmentationTest, Segment) {
const auto& segments = segmenter->Segment(utf8("太后的头发干燥"));
EXPECT_EQ(4, segments->Length());
- EXPECT_EQ(utf8("太后"), string(segments->At(0)));
- EXPECT_EQ(utf8("的"), string(segments->At(1)));
- EXPECT_EQ(utf8("头发"), string(segments->At(2)));
- EXPECT_EQ(utf8("干燥"), string(segments->At(3)));
+ EXPECT_EQ(utf8("太后"), std::string(segments->At(0)));
+ EXPECT_EQ(utf8("的"), std::string(segments->At(1)));
+ EXPECT_EQ(utf8("头发"), std::string(segments->At(2)));
+ EXPECT_EQ(utf8("干燥"), std::string(segments->At(3)));
}
} // namespace opencc
/*
* Open Chinese Convert
*
- * Copyright 2010-2014 BYVoid <byvoid@byvoid.com>
+ * Copyright 2010-2014 Carbo Kuo <byvoid@byvoid.com>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
/*
* Open Chinese Convert
*
- * Copyright 2015-2020 BYVoid <byvoid@byvoid.com>
+ * Copyright 2015-2020 Carbo Kuo <byvoid@byvoid.com>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* limitations under the License.
*/
+#include <algorithm>
#include <cmath>
#include <unordered_map>
#include "PhraseExtract.hpp"
+#ifdef _MSC_VER
+#pragma execution_character_set("utf-8")
+#endif
+
namespace opencc {
namespace internal {
bool ContainsPunctuation(const PhraseExtract::UTF8StringSlice8Bit& word) {
- static const vector<PhraseExtract::UTF8StringSlice8Bit> punctuations = {
+ static const std::vector<PhraseExtract::UTF8StringSlice8Bit> punctuations = {
" ", "\n", "\r", "\t", "-", ",", ".", "?", "!", "*", " ",
",", "。", "、", ";", ":", "?", "!", "…", "“", "”", "「",
"」", "—", "-", "(", ")", "《", "》", ".", "/", "\"};
marisa_trie.clear();
}
- const vector<ItemType>& Items() const { return items; }
+ const std::vector<ItemType>& Items() const { return items; }
void Build() {
BuildKeys();
void BuildTrie() {
std::unordered_map<std::string, int> key_item_id_map;
marisa::Keyset keyset;
- for (size_t i = 0; i < items.size(); i++) {
+ for (int i = 0; i < items.size(); i++) {
const auto& key = items[i].first;
key_item_id_map[key.ToString()] = i;
keyset.push_back(key.CString(), key.ByteLength());
std::unordered_map<UTF8StringSlice8Bit, PhraseExtract::Signals,
UTF8StringSlice8Bit::Hasher>
dict;
- vector<ItemType> items;
+ std::vector<ItemType> items;
marisa::Trie marisa_trie;
std::vector<int> marisa_id_item_map;
};
template <bool SUFFIX>
void CalculatePrefixSuffixEntropy(
- const vector<PhraseExtract::UTF8StringSlice8Bit>& presuffixes,
+ const std::vector<PhraseExtract::UTF8StringSlice8Bit>& presuffixes,
const PhraseExtract::LengthType setLength,
const PhraseExtract::LengthType wordMinLength,
const PhraseExtract::LengthType wordMaxLength,
/*
* Open Chinese Convert
*
- * Copyright 2015 BYVoid <byvoid@byvoid.com>
+ * Copyright 2015 Carbo Kuo <byvoid@byvoid.com>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
#pragma once
+#include <functional>
#include <unordered_map>
#include "Common.hpp"
virtual ~PhraseExtract();
- void Extract(const string& text) {
+ void Extract(const std::string& text) {
SetFullText(text);
ExtractSuffixes();
CalculateFrequency();
SelectWords();
}
- void SetFullText(const string& fullText) {
+ void SetFullText(const std::string& fullText) {
utf8FullText = UTF8StringSlice(fullText.c_str());
}
postCalculationFilter = filter;
}
- void ReleaseSuffixes() { vector<UTF8StringSlice8Bit>().swap(suffixes); }
+ void ReleaseSuffixes() { std::vector<UTF8StringSlice8Bit>().swap(suffixes); }
- void ReleasePrefixes() { vector<UTF8StringSlice8Bit>().swap(prefixes); }
+ void ReleasePrefixes() { std::vector<UTF8StringSlice8Bit>().swap(prefixes); }
- const vector<UTF8StringSlice8Bit>& Words() const { return words; }
+ const std::vector<UTF8StringSlice8Bit>& Words() const { return words; }
- const vector<UTF8StringSlice8Bit>& WordCandidates() const {
+ const std::vector<UTF8StringSlice8Bit>& WordCandidates() const {
return wordCandidates;
}
UTF8StringSlice utf8FullText;
size_t totalOccurrence;
double logTotalOccurrence;
- vector<UTF8StringSlice8Bit> prefixes;
- vector<UTF8StringSlice8Bit> suffixes;
- vector<UTF8StringSlice8Bit> wordCandidates;
- vector<UTF8StringSlice8Bit> words;
+ std::vector<UTF8StringSlice8Bit> prefixes;
+ std::vector<UTF8StringSlice8Bit> suffixes;
+ std::vector<UTF8StringSlice8Bit> wordCandidates;
+ std::vector<UTF8StringSlice8Bit> words;
DictType* signals;
friend class PhraseExtractTest;
/*
* Open Chinese Convert
*
- * Copyright 2015 BYVoid <byvoid@byvoid.com>
+ * Copyright 2015 Carbo Kuo <byvoid@byvoid.com>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
: siShi(utf8("四是四十是十十四是十四四十是四十")),
punctuation(utf8("一.二.三")) {}
- const vector<UTF8StringSlice8Bit>& Suffixes() const {
+ const std::vector<UTF8StringSlice8Bit>& Suffixes() const {
return phraseExtract.suffixes;
}
- const vector<UTF8StringSlice8Bit>& Prefixes() const {
+ const std::vector<UTF8StringSlice8Bit>& Prefixes() const {
return phraseExtract.prefixes;
}
PhraseExtract phraseExtract;
- const string siShi;
- const string punctuation;
+ const std::string siShi;
+ const std::string punctuation;
};
TEST_F(PhraseExtractTest, ExtractSuffixes) {
phraseExtract.SetFullText(siShi);
phraseExtract.ExtractSuffixes();
EXPECT_EQ(
- vector<UTF8StringSlice8Bit>(
+ std::vector<UTF8StringSlice8Bit>(
{"十", "十十四是", "十四四十", "十四是十", "十是十十", "十是四十",
"四十", "四十是十", "四十是四", "四四十是", "四是十四", "四是四十",
"是十十四", "是十四四", "是四十", "是四十是"}),
phraseExtract.SetFullText(siShi);
phraseExtract.ExtractPrefixes();
EXPECT_EQ(
- vector<UTF8StringSlice8Bit>(
+ std::vector<UTF8StringSlice8Bit>(
{"十是十十", "十四四十", "十是四十", "四是四十", "四十是十",
"十四是十", "四", "是十十四", "四是十四", "是十四四", "四十是四",
"四是四", "四四十是", "是四十是", "四是", "十十四是"}),
phraseExtract.SetWordMaxLength(3);
phraseExtract.SetFullText(siShi);
phraseExtract.ExtractWordCandidates();
- EXPECT_EQ(vector<UTF8StringSlice8Bit>(
+ EXPECT_EQ(std::vector<UTF8StringSlice8Bit>(
{"十", "四", "是", "四十", "十四", "十是",
"四十是", "四是", "是十", "是四", "是四十", "十十",
"十十四", "十四四", "十四是", "十是十", "十是四", "四四",
return phraseExtract.Frequency(word) == 1;
});
phraseExtract.SelectWords();
- EXPECT_EQ(
- vector<UTF8StringSlice8Bit>({"十", "四", "是", "四十", "十四", "十是",
- "四十是", "四是", "是十", "是四", "是四十"}),
- phraseExtract.Words());
+ EXPECT_EQ(std::vector<UTF8StringSlice8Bit>({"十", "四", "是", "四十", "十四",
+ "十是", "四十是", "四是", "是十",
+ "是四", "是四十"}),
+ phraseExtract.Words());
}
TEST_F(PhraseExtractTest, Punctuation) {
phraseExtract.SetFullText(punctuation);
phraseExtract.ExtractPrefixes();
EXPECT_EQ(
- vector<UTF8StringSlice8Bit>({"一.", ".二.", "一", "二.三", "一.二"}),
+ std::vector<UTF8StringSlice8Bit>({"一.", ".二.", "一", "二.三", "一.二"}),
Prefixes());
}
/*
* Open Chinese Convert
*
- * Copyright 2010-2014 BYVoid <byvoid@byvoid.com>
+ * Copyright 2010-2014 Carbo Kuo <byvoid@byvoid.com>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
/*
* Open Chinese Convert
*
- * Copyright 2010-2014 BYVoid <byvoid@byvoid.com>
+ * Copyright 2010-2014 Carbo Kuo <byvoid@byvoid.com>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
*/
class OPENCC_EXPORT Segmentation {
public:
- virtual SegmentsPtr Segment(const string& text) const = 0;
+ virtual SegmentsPtr Segment(const std::string& text) const = 0;
};
} // namespace opencc
/*
* Open Chinese Convert
*
- * Copyright 2010-2014 BYVoid <byvoid@byvoid.com>
+ * Copyright 2010-2014 Carbo Kuo <byvoid@byvoid.com>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
#pragma once
+#include <sstream>
+
#include "Common.hpp"
namespace opencc {
Segments() {}
Segments(std::initializer_list<const char*> initList) {
- for (const string& item : initList) {
+ for (const std::string& item : initList) {
AddSegment(item);
}
}
- Segments(std::initializer_list<string> initList) {
- for (const string& item : initList) {
+ Segments(std::initializer_list<std::string> initList) {
+ for (const std::string& item : initList) {
AddSegment(item);
}
}
unmanaged.push_back(unmanagedString);
}
- void AddSegment(const string& str) {
+ void AddSegment(const std::string& str) {
indexes.push_back(std::make_pair(managed.size(), true));
managed.push_back(str);
}
iterator end() const { return iterator(this, indexes.size()); }
- string ToString() const {
+ std::string ToString() const {
// TODO implement a nested structure to reduce concatenation,
// like a purely functional differential list
std::ostringstream buffer;
private:
Segments(const Segments&) {}
- vector<const char*> unmanaged;
- vector<string> managed;
+ std::vector<const char*> unmanaged;
+ std::vector<std::string> managed;
// index, managed
- vector<std::pair<size_t, bool>> indexes;
+ std::vector<std::pair<size_t, bool>> indexes;
};
} // namespace opencc
/*
* Open Chinese Convert
*
- * Copyright 2010-2014 BYVoid <byvoid@byvoid.com>
+ * Copyright 2010-2014 Carbo Kuo <byvoid@byvoid.com>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
/**
* Serializes the dictionary and writes in to a file.
*/
- virtual void SerializeToFile(const string& fileName) const {
+ virtual void SerializeToFile(const std::string& fileName) const {
FILE* fp = fopen(fileName.c_str(), "wb");
if (fp == NULL) {
throw FileNotWritable(fileName);
}
template <typename DICT>
- static bool TryLoadFromFile(const string& fileName,
+ static bool TryLoadFromFile(const std::string& fileName,
std::shared_ptr<DICT>* dict) {
FILE* fp =
#ifdef _MSC_VER
}
template <typename DICT>
- static std::shared_ptr<DICT> NewFromFile(const string& fileName) {
+ static std::shared_ptr<DICT> NewFromFile(const std::string& fileName) {
std::shared_ptr<DICT> dict;
if (!TryLoadFromFile<DICT>(fileName, &dict)) {
throw FileNotFound(fileName);
/*
* Open Chinese Convert
*
- * Copyright 2020 BYVoid <byvoid@byvoid.com>
+ * Copyright 2020 Carbo Kuo <byvoid@byvoid.com>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* limitations under the License.
*/
-#include "SerializedValues.hpp"
+#include <cassert>
+#include <cstring>
+
#include "Lexicon.hpp"
+#include "SerializedValues.hpp"
using namespace opencc;
size_t SerializedValues::KeyMaxLength() const { return 0; }
void SerializedValues::SerializeToFile(FILE* fp) const {
- string valueBuf;
- vector<uint16_t> valueBytes;
+ std::string valueBuf;
+ std::vector<uint16_t> valueBytes;
uint32_t valueTotalLength = 0;
ConstructBuffer(&valueBuf, &valueBytes, &valueTotalLength);
// Number of items
// Values
uint32_t valueTotalLength = ReadInteger<uint32_t>(fp);
- string valueBuffer;
+ std::string valueBuffer;
valueBuffer.resize(valueTotalLength);
size_t unitsRead = fread(const_cast<char*>(valueBuffer.c_str()), sizeof(char),
valueTotalLength, fp);
// Number of values
uint16_t numValues = ReadInteger<uint16_t>(fp);
// Value offset
- vector<std::string> values;
+ std::vector<std::string> values;
for (uint16_t j = 0; j < numValues; j++) {
const char* value = pValueBuffer;
uint16_t numValueBytes = ReadInteger<uint16_t>(fp);
return dict;
}
-void SerializedValues::ConstructBuffer(string* valueBuffer,
- vector<uint16_t>* valueBytes,
+void SerializedValues::ConstructBuffer(std::string* valueBuffer,
+ std::vector<uint16_t>* valueBytes,
uint32_t* valueTotalLength) const {
*valueTotalLength = 0;
// Calculate total length.
for (const std::unique_ptr<DictEntry>& entry : *lexicon) {
assert(entry->NumValues() != 0);
for (const auto& value : entry->Values()) {
- *valueTotalLength += value.length() + 1;
+ *valueTotalLength += static_cast<uint32_t>(value.length()) + 1;
}
}
// Write values to the buffer.
/*
* Open Chinese Convert
*
- * Copyright 2020 BYVoid <byvoid@byvoid.com>
+ * Copyright 2020 Carbo Kuo <byvoid@byvoid.com>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
private:
LexiconPtr lexicon;
- void ConstructBuffer(string* valueBuffer, vector<uint16_t>* valueBytes,
+ void ConstructBuffer(std::string* valueBuffer,
+ std::vector<uint16_t>* valueBytes,
uint32_t* valueTotalLength) const;
};
} // namespace opencc
/*
* Open Chinese Convert
*
- * Copyright 2020 BYVoid <byvoid@byvoid.com>
+ * Copyright 2020 Carbo Kuo <byvoid@byvoid.com>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
fileName("dict.bin"){};
const std::shared_ptr<SerializedValues> binDict;
- const string fileName;
+ const std::string fileName;
};
TEST_F(SerializedValuesTest, Serialization) {
/*
* Open Chinese Convert
*
- * Copyright 2010-2014 BYVoid <byvoid@byvoid.com>
+ * Copyright 2010-2014 Carbo Kuo <byvoid@byvoid.com>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
}
std::string SimpleConverter::Convert(const char* input) const {
- return Convert(string(input));
+ return Convert(std::string(input));
}
std::string SimpleConverter::Convert(const char* input, size_t length) const {
if (length == static_cast<size_t>(-1)) {
- return Convert(string(input));
+ return Convert(std::string(input));
} else {
return Convert(UTF8Util::FromSubstr(input, length));
}
if (length == static_cast<size_t>(-1)) {
return Convert(input, output);
} else {
- string trimmed = UTF8Util::FromSubstr(input, length);
+ std::string trimmed = UTF8Util::FromSubstr(input, length);
return Convert(trimmed.c_str(), output);
}
}
-static string cError;
+static std::string cError;
opencc_t opencc_open_internal(const char* configFileName) {
try {
/*
* Open Chinese Convert
*
- * Copyright 2010-2014 BYVoid <byvoid@byvoid.com>
+ * Copyright 2010-2014 Carbo Kuo <byvoid@byvoid.com>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
/**
* Converts a text
- * @param input A C-Style string (terminated by '\0') to be converted.
+ * @param input A C-Style std::string (terminated by '\0') to be converted.
*/
std::string Convert(const char* input) const;
/**
* Converts a text
- * @param input A C-Style string limited by a given length to be converted.
- * @param length Maximal length in byte of the input string.
+ * @param input A C-Style std::string limited by a given length to be
+ * converted.
+ * @param length Maximal length in byte of the input std::string.
*/
std::string Convert(const char* input, size_t length) const;
/**
* Converts a text and writes to an allocated buffer
* Please make sure the buffer has sufficent space.
- * @param input A C-Style string (terminated by '\0') to be converted.
+ * @param input A C-Style std::string (terminated by '\0') to be converted.
* @param output Buffer to write the converted text.
* @return Length of converted text.
*/
/**
* Converts a text and writes to an allocated buffer
* Please make sure the buffer has sufficent space.
- * @param input A C-Style string limited by a given length to be converted.
- * @param length Maximal length in byte of the input string.
+ * @param input A C-Style std::string limited by a given length to be
+ * converted.
+ * @param length Maximal length in byte of the input std::string.
* @param output Buffer to write the converted text.
* @return Length of converted text.
*/
/*
* Open Chinese Convert
*
- * Copyright 2015 BYVoid <byvoid@byvoid.com>
+ * Copyright 2015 Carbo Kuo <byvoid@byvoid.com>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
void TestConverter(const std::string& config) const {
const SimpleConverter converter(config);
- const string& converted =
+ const std::string& converted =
converter.Convert(utf8("燕燕于飞差池其羽之子于归远送于野"));
EXPECT_EQ(utf8("燕燕于飛差池其羽之子于歸遠送於野"), converted);
}
}
TEST_F(SimpleConverterTest, CInterface) {
- const string& text = utf8("燕燕于飞差池其羽之子于归远送于野");
- const string& expected = utf8("燕燕于飛差池其羽之子于歸遠送於野");
+ const std::string& text = utf8("燕燕于飞差池其羽之子于归远送于野");
+ const std::string& expected = utf8("燕燕于飛差池其羽之子于歸遠送於野");
{
opencc_t od = opencc_open(CONFIG_TEST_PATH.c_str());
char* converted = opencc_convert_utf8(od, text.c_str(), (size_t)-1);
EXPECT_EQ(0, opencc_close(od));
}
{
- string path = "/opencc/no/such/file/or/directory";
+ std::string path = "/opencc/no/such/file/or/directory";
opencc_t od = opencc_open(path.c_str());
EXPECT_EQ(reinterpret_cast<opencc_t>(-1), od);
EXPECT_EQ(path + " not found or not accessible.", opencc_error());
/*
* Open Chinese Convert
*
- * Copyright 2010-2014 BYVoid <byvoid@byvoid.com>
+ * Copyright 2010-2014 Carbo Kuo <byvoid@byvoid.com>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
/*
* Open Chinese Convert
*
- * Copyright 2020 BYVoid <byvoid@byvoid.com>
+ * Copyright 2020 Carbo Kuo <byvoid@byvoid.com>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
namespace opencc {
-#if defined(_MSC_VER) && _MSC_VER > 1310
-// Visual C++ 2005 and later require the source files in UTF-8, and all strings
-// to be encoded as wchar_t otherwise the strings will be converted into the
-// local multibyte encoding and cause errors. To use a wchar_t as UTF-8, these
-// strings then need to be convert back to UTF-8. This function is just a rough
-// example of how to do this.
-#include <Windows.h>
-#define utf8(str) ConvertToUTF8(L##str)
-std::string ConvertToUTF8(const wchar_t* pStr) {
- static char szBuf[1024];
- WideCharToMultiByte(CP_UTF8, 0, pStr, -1, szBuf, sizeof(szBuf), NULL, NULL);
- return szBuf;
-}
-
-#else // if defined(_MSC_VER) && _MSC_VER > 1310
-// Visual C++ 2003 and gcc will use the string literals as is, so the files
-// should be saved as UTF-8. gcc requires the files to not have a UTF-8 BOM.
#define utf8(str) std::string(str)
-#endif // if defined(_MSC_VER) && _MSC_VER > 1310
} // namespace opencc
/*
* Open Chinese Convert
*
- * Copyright 2010-2020 BYVoid <byvoid@byvoid.com>
+ * Copyright 2010-2020 Carbo Kuo <byvoid@byvoid.com>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* limitations under the License.
*/
-#include "TextDict.hpp"
+#include <algorithm>
+#include <cassert>
+
#include "Lexicon.hpp"
+#include "TextDict.hpp"
using namespace opencc;
size_t length;
const char* pbuff = UTF8Util::FindNextInline(buff, '\t');
if (UTF8Util::IsLineEndingOrFileEnding(*pbuff)) {
- throw InvalidTextDictionary("Tabular not found " + string(buff), lineNum);
+ throw InvalidTextDictionary("Tabular not found " + std::string(buff),
+ lineNum);
}
length = static_cast<size_t>(pbuff - buff);
- string key = UTF8Util::FromSubstr(buff, length);
- vector<string> values;
+ std::string key = UTF8Util::FromSubstr(buff, length);
+ std::vector<std::string> values;
while (!UTF8Util::IsLineEndingOrFileEnding(*pbuff)) {
buff = pbuff = UTF8Util::NextChar(pbuff);
pbuff = UTF8Util::FindNextInline(buff, ' ');
length = static_cast<size_t>(pbuff - buff);
- const string& value = UTF8Util::FromSubstr(buff, length);
+ const std::string& value = UTF8Util::FromSubstr(buff, length);
values.push_back(value);
}
if (values.size() == 0) {
size_t TextDict::KeyMaxLength() const { return maxLength; }
Optional<const DictEntry*> TextDict::Match(const char* word, size_t len) const {
- std::unique_ptr<DictEntry> entry(new NoValueDictEntry(word));
+ std::unique_ptr<DictEntry> entry(
+ new NoValueDictEntry(std::string(word, len)));
const auto& found = std::lower_bound(lexicon->begin(), lexicon->end(), entry,
DictEntry::UPtrLessThan);
if ((found != lexicon->end()) && ((*found)->Key() == entry->Key())) {
/*
* Open Chinese Convert
*
- * Copyright 2010-2020 BYVoid <byvoid@byvoid.com>
+ * Copyright 2010-2020 Carbo Kuo <byvoid@byvoid.com>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
/*
* Open Chinese Convert
*
- * Copyright 2015 BYVoid <byvoid@byvoid.com>
+ * Copyright 2015 Carbo Kuo <byvoid@byvoid.com>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
protected:
TextDictTest() : fileName("dict.txt"){};
- const string fileName;
+ const std::string fileName;
};
TEST_F(TextDictTest, DictTest) { TestDict(textDict); }
/*
* Open Chinese Convert
*
- * Copyright 2015-2020 BYVoid <byvoid@byvoid.com>
+ * Copyright 2015-2020 Carbo Kuo <byvoid@byvoid.com>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
DictPtr CreateDictForCharacters() const {
LexiconPtr lexicon(new Lexicon);
- lexicon->Add(DictEntryFactory::New(utf8("后"),
- vector<string>{utf8("后"), utf8("後")}));
- lexicon->Add(DictEntryFactory::New(utf8("发"),
- vector<string>{utf8("發"), utf8("髮")}));
lexicon->Add(DictEntryFactory::New(
- utf8("干"), vector<string>{utf8("幹"), utf8("乾"), utf8("干")}));
- lexicon->Add(DictEntryFactory::New(utf8("里"),
- vector<string>{utf8("裏"), utf8("里")}));
+ utf8("后"), std::vector<std::string>{utf8("后"), utf8("後")}));
+ lexicon->Add(DictEntryFactory::New(
+ utf8("发"), std::vector<std::string>{utf8("發"), utf8("髮")}));
+ lexicon->Add(DictEntryFactory::New(
+ utf8("干"),
+ std::vector<std::string>{utf8("幹"), utf8("乾"), utf8("干")}));
+ lexicon->Add(DictEntryFactory::New(
+ utf8("里"), std::vector<std::string>{utf8("裏"), utf8("里")}));
lexicon->Sort();
return TextDictPtr(new TextDict(lexicon));
}
DictPtr CreateDictForTaiwanVariants() const {
LexiconPtr lexicon(new Lexicon);
lexicon->Add(DictEntryFactory::New(utf8("裏"), utf8("裡")));
- TextDictPtr textDict(new TextDict(lexicon));
- return textDict;
+ return TextDictPtr(new TextDict(lexicon));
}
DictPtr CreateTaiwanPhraseDict() const {
EXPECT_EQ(utf8("BYVoid"), entry.Get()->Key());
EXPECT_EQ(utf8("byv"), entry.Get()->GetDefault());
- entry = dict->MatchPrefix("清華大學");
+ entry = dict->MatchPrefix(utf8("清華大學"));
EXPECT_TRUE(!entry.IsNull());
EXPECT_EQ(utf8("清華大學"), entry.Get()->Key());
EXPECT_EQ(utf8("TsinghuaUniversity"), entry.Get()->GetDefault());
}
void TestMatchAllPrefixes(const DictPtr& dict) const {
- const vector<const DictEntry*> matches =
+ const std::vector<const DictEntry*> matches =
dict->MatchAllPrefixes(utf8("清華大學計算機系"));
EXPECT_EQ(3, matches.size());
EXPECT_EQ(utf8("清華大學"), matches.at(0)->Key());
/*
* Open Chinese Convert
*
- * Copyright 2015 BYVoid <byvoid@byvoid.com>
+ * Copyright 2015 Carbo Kuo <byvoid@byvoid.com>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
/*
* Open Chinese Convert
*
- * Copyright 2015 BYVoid <byvoid@byvoid.com>
+ * Copyright 2015 Carbo Kuo <byvoid@byvoid.com>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* limitations under the License.
*/
+#include <cstring>
+
#include "Common.hpp"
#include "UTF8Util.hpp"
}
}
- string ToString() const { return string(str, str + byteLength); }
+ std::string ToString() const { return std::string(str, str + byteLength); }
const char* CString() const { return str; }
/*
* Open Chinese Convert
*
- * Copyright 2015 BYVoid <byvoid@byvoid.com>
+ * Copyright 2015 Carbo Kuo <byvoid@byvoid.com>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
UTF8StringSliceTest()
: text("天行健,君子以自強不息。地勢坤,君子以厚德載物。"), empty(""){};
- const UTF8StringSlice text;
+ UTF8StringSlice text;
const UTF8StringSlice empty;
};
}
TEST_F(UTF8StringSliceTest, MoveRight) {
- UTF8StringSlice text = this->text;
text.MoveRight();
EXPECT_EQ(UTF8StringSlice("行健,君子以自強不息。地勢坤,君子以厚德載物。"),
text);
}
TEST_F(UTF8StringSliceTest, MoveLeft) {
- UTF8StringSlice text = this->text;
text.MoveLeft();
EXPECT_EQ(UTF8StringSlice("天行健,君子以自強不息。地勢坤,君子以厚德載物"),
text);
/*
* Open Chinese Convert
*
- * Copyright 2010-2014 BYVoid <byvoid@byvoid.com>
+ * Copyright 2010-2014 Carbo Kuo <byvoid@byvoid.com>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
/*
* Open Chinese Convert
*
- * Copyright 2013 BYVoid <byvoid@byvoid.com>
+ * Copyright 2013 Carbo Kuo <byvoid@byvoid.com>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
#undef NOMINMAX
#endif // _MSC_VER
+#include <cstring>
+
#include "Common.hpp"
+#include "Exception.hpp"
namespace opencc {
/**
- * UTF8 string utilities
+ * UTF8 std::string utilities
* @ingroup opencc_cpp_api
*/
class OPENCC_EXPORT UTF8Util {
}
/**
- * Returns the UTF8 length of a valid UTF8 string.
+ * Returns the UTF8 length of a valid UTF8 std::string.
*/
static size_t Length(const char* str) {
size_t length = 0;
}
/**
- * Copies a substring with given length to a new std::string.
+ * Copies a substd::string with given length to a new std::string.
*/
- static string FromSubstr(const char* str, size_t length) {
- string newStr;
+ static std::string FromSubstr(const char* str, size_t length) {
+ std::string newStr;
newStr.resize(length);
strncpy(const_cast<char*>(newStr.c_str()), str, length);
return newStr;
}
/**
- * Returns true if the given string is longer or as long as the given length.
+ * Returns true if the given std::string is longer or as long as the given
+ * length.
*/
static bool NotShorterThan(const char* str, size_t byteLength) {
while (byteLength > 0) {
}
/**
- * Truncates a string with a maximal length in byte.
+ * Truncates a std::string with a maximal length in byte.
* No UTF8 character will be broken.
*/
- static string TruncateUTF8(const char* str, size_t maxByteLength) {
- string wordTrunc;
+ static std::string TruncateUTF8(const char* str, size_t maxByteLength) {
+ std::string wordTrunc;
if (NotShorterThan(str, maxByteLength)) {
size_t len = 0;
const char* pStr = str;
}
/**
- * Replaces all patterns in a string in place.
+ * Replaces all patterns in a std::string in place.
*/
- static void ReplaceAll(string& str, const char* from, const char* to) {
- string::size_type pos = 0;
- string::size_type fromLen = strlen(from);
- string::size_type toLen = strlen(to);
- while ((pos = str.find(from, pos)) != string::npos) {
+ static void ReplaceAll(std::string& str, const char* from, const char* to) {
+ std::string::size_type pos = 0;
+ std::string::size_type fromLen = strlen(from);
+ std::string::size_type toLen = strlen(to);
+ while ((pos = str.find(from, pos)) != std::string::npos) {
str.replace(pos, fromLen, to);
pos += toLen;
}
}
/**
- * Joins a string vector in to a string with a separator.
+ * Joins a std::string vector in to a std::string with a separator.
*/
- static string Join(const vector<string>& strings, const string& separator) {
+ static std::string Join(const std::vector<std::string>& strings,
+ const std::string& separator) {
std::ostringstream buffer;
bool first = true;
for (const auto& str : strings) {
}
/**
- * Joins a string vector in to a string.
+ * Joins a std::string vector in to a std::string.
*/
- static string Join(const vector<string>& strings) {
+ static std::string Join(const std::vector<std::string>& strings) {
std::ostringstream buffer;
for (const auto& str : strings) {
buffer << str;
}
static void GetByteMap(const char* str, const size_t utf8Length,
- vector<size_t>* byteMap) {
+ std::vector<size_t>* byteMap) {
if (byteMap->size() < utf8Length) {
byteMap->resize(utf8Length);
}
/*
* Open Chinese Convert
*
- * Copyright 2015 BYVoid <byvoid@byvoid.com>
+ * Copyright 2015 Carbo Kuo <byvoid@byvoid.com>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
}
TEST_F(UTF8UtilTest, GetByteMap) {
- vector<size_t> byteMap;
+ std::vector<size_t> byteMap;
UTF8Util::GetByteMap(text, 6, &byteMap);
- EXPECT_EQ(vector<size_t>({0, 3, 6, 9, 12, 16}), byteMap);
+ EXPECT_EQ(std::vector<size_t>({0, 3, 6, 9, 12, 16}), byteMap);
}
} // namespace opencc
add_executable(performance Performance.cpp)
target_link_libraries(performance benchmark libopencc)
-add_test(performance performance)
+add_test(BenchmarkTest performance)
+
+if (WIN32)
+ add_custom_target(
+ copy_benchmark
+ ${CMAKE_COMMAND} -E copy $<TARGET_FILE:benchmark> ${CMAKE_CURRENT_BINARY_DIR}
+ COMMENT "Copy benchmark"
+ )
+ add_custom_target(
+ copy_opencc
+ ${CMAKE_COMMAND} -E copy $<TARGET_FILE:libopencc> ${CMAKE_CURRENT_BINARY_DIR}
+ COMMENT "Copy opencc"
+ )
+ add_dependencies(performance copy_benchmark copy_opencc)
+endif()
#include <iostream>
#include <memory>
#include <streambuf>
+
+#ifdef _MSC_VER
+#include <direct.h>
+#else
#include <unistd.h>
+#endif
#include "SimpleConverter.hpp"
#include "TestUtilsUTF8.hpp"
/*
* Open Chinese Convert
*
- * Copyright 2010-2014 BYVoid <byvoid@byvoid.com>
+ * Copyright 2010-2014 Carbo Kuo <byvoid@byvoid.com>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
OPENCC_EXPORT int opencc_close(opencc_t opencc);
/**
- * Converts UTF-8 string
+ * Converts UTF-8 std::string
*
* @param opencc The opencc description pointer.
- * @param input The UTF-8 encoded string.
+ * @param input The UTF-8 encoded std::string.
* @param length The maximum length in byte to convert. If length is (size_t)-1,
- * the whole string (terminated by '\0') will be converted.
+ * the whole std::string (terminated by '\0') will be converted.
* @param output The buffer to store converted text. You MUST make sure this
* buffer has sufficient space.
*
- * @return The length of converted string or (size_t)-1 on error.
+ * @return The length of converted std::string or (size_t)-1 on error.
*
* @ingroup opencc_c_api
*/
size_t length, char* output);
/**
- * Converts UTF-8 string
- * This function returns an allocated C-Style string, which stores
- * the converted string.
+ * Converts UTF-8 std::string
+ * This function returns an allocated C-Style std::string, which stores
+ * the converted std::string.
* You MUST call opencc_convert_utf8_free() to release allocated memory.
*
* @param opencc The opencc description pointer.
- * @param input The UTF-8 encoded string.
+ * @param input The UTF-8 encoded std::string.
* @param length The maximum length in byte to convert. If length is (size_t)-1,
- * the whole string (terminated by '\0') will be converted.
+ * the whole std::string (terminated by '\0') will be converted.
*
- * @return The newly allocated UTF-8 string that stores text converted,
- * or NULL on error.
+ * @return The newly allocated UTF-8 std::string that stores text
+ * converted, or NULL on error.
* @ingroup opencc_c_api
*/
OPENCC_EXPORT char* opencc_convert_utf8(opencc_t opencc, const char* input,
/**
* Releases allocated buffer by opencc_convert_utf8
*
- * @param str Pointer to the allocated string buffer by opencc_convert_utf8.
+ * @param str Pointer to the allocated std::string buffer by
+ * opencc_convert_utf8.
*
* @ingroup opencc_c_api
*/
/*
* Open Chinese Convert
*
- * Copyright 2010-2014 BYVoid <byvoid@byvoid.com>
+ * Copyright 2010-2014 Carbo Kuo <byvoid@byvoid.com>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* limitations under the License.
*/
+#include <fstream>
+
#include "CmdLineOutput.hpp"
#include "Config.hpp"
#include "Converter.hpp"
using namespace opencc;
-Optional<string> inputFileName = Optional<string>::Null();
-Optional<string> outputFileName = Optional<string>::Null();
-string configFileName;
+Optional<std::string> inputFileName = Optional<std::string>::Null();
+Optional<std::string> outputFileName = Optional<std::string>::Null();
+std::string configFileName;
bool noFlush;
Config config;
ConverterPtr converter;
} else {
isFirstLine = false;
}
- string line;
+ std::string line;
std::getline(inputStream, line);
- const string& converted = converter->Convert(line);
+ const std::string& converted = converter->Convert(line);
fputs(converted.c_str(), fout);
if (!noFlush) {
// Flush every line if the output stream is stdout.
fclose(fout);
}
-void Convert(string inputFileName) {
+void Convert(std::string fileName) {
const int BUFFER_SIZE = 1024 * 1024;
static bool bufferInitialized = false;
- static string buffer;
+ static std::string buffer;
static char* bufferBegin;
static const char* bufferEnd;
static char* bufferPtr;
}
bool needToRemove = false;
- if (!outputFileName.IsNull() && inputFileName == outputFileName.Get()) {
+ if (!outputFileName.IsNull() && fileName == outputFileName.Get()) {
// Special case: input == output
- const string tempFileName = std::tmpnam(nullptr);
- std::ifstream src(inputFileName, std::ios::binary);
+ const std::string tempFileName = std::tmpnam(nullptr);
+ std::ifstream src(fileName, std::ios::binary);
std::ofstream dst(tempFileName, std::ios::binary);
dst << src.rdbuf();
dst.close();
- inputFileName = tempFileName;
+ fileName = tempFileName;
needToRemove = true;
}
- FILE* fin = fopen(inputFileName.c_str(), "r");
+ FILE* fin = fopen(fileName.c_str(), "r");
if (!fin) {
- throw FileNotFound(inputFileName);
+ throw FileNotFound(fileName);
}
FILE* fout = GetOutputStream();
while (!feof(fin)) {
size_t length = fread(bufferPtr, sizeof(char), bufferSizeAvailble, fin);
bufferPtr[length] = '\0';
size_t remainingLength = 0;
- string remainingTemp;
+ std::string remainingTemp;
if (length == bufferSizeAvailble) {
// fread may breaks UTF8 character
// Find the end of last character
}
}
// Perform conversion
- const string& converted = converter->Convert(buffer);
+ const std::string& converted = converter->Convert(buffer);
fputs(converted.c_str(), fout);
if (!noFlush) {
// Flush every line if the output stream is stdout.
fclose(fout);
if (needToRemove) {
// Remove temporary file.
- std::remove(inputFileName.c_str());
+ std::remove(fileName.c_str());
}
}
CmdLineOutput cmdLineOutput;
cmd.setOutput(&cmdLineOutput);
- TCLAP::ValueArg<string> configArg(
+ TCLAP::ValueArg<std::string> configArg(
"c", "config", "Configuration file", false /* required */,
"s2t.json" /* default */, "file" /* type */, cmd);
- TCLAP::ValueArg<string> outputArg(
+ TCLAP::ValueArg<std::string> outputArg(
"o", "output", "Write converted text to <file>.", false /* required */,
"" /* default */, "file" /* type */, cmd);
- TCLAP::ValueArg<string> inputArg(
+ TCLAP::ValueArg<std::string> inputArg(
"i", "input", "Read original text from <file>.", false /* required */,
"" /* default */, "file" /* type */, cmd);
TCLAP::ValueArg<bool> noFlushArg(
configFileName = configArg.getValue();
noFlush = noFlushArg.getValue();
if (inputArg.isSet()) {
- inputFileName = Optional<string>(inputArg.getValue());
+ inputFileName = Optional<std::string>(inputArg.getValue());
}
if (outputArg.isSet()) {
- outputFileName = Optional<string>(outputArg.getValue());
+ outputFileName = Optional<std::string>(outputArg.getValue());
noFlush = true;
}
converter = config.NewFromFile(configFileName);
/*
* Open Chinese Convert
*
- * Copyright 2010-2014 BYVoid <byvoid@byvoid.com>
+ * Copyright 2010-2014 Carbo Kuo <byvoid@byvoid.com>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
#include "DictConverter.hpp"
#include "CmdLineOutput.hpp"
+#include "Exception.hpp"
using namespace opencc;
CmdLineOutput cmdLineOutput;
cmd.setOutput(&cmdLineOutput);
- vector<string> dictFormats{"text", "ocd2", "ocd"};
- TCLAP::ValuesConstraint<string> allowedVals(dictFormats);
+ std::vector<std::string> dictFormats{"text", "ocd2", "ocd"};
+ TCLAP::ValuesConstraint<std::string> allowedVals(dictFormats);
- TCLAP::ValueArg<string> toArg("t", "to", "Output format",
- true /* required */, "" /* default */,
- &allowedVals /* type */, cmd);
- TCLAP::ValueArg<string> fromArg("f", "from", "Input format",
- true /* required */, "" /* default */,
- &allowedVals /* type */, cmd);
- TCLAP::ValueArg<string> outputArg(
+ TCLAP::ValueArg<std::string> toArg("t", "to", "Output format",
+ true /* required */, "" /* default */,
+ &allowedVals /* type */, cmd);
+ TCLAP::ValueArg<std::string> fromArg("f", "from", "Input format",
+ true /* required */, "" /* default */,
+ &allowedVals /* type */, cmd);
+ TCLAP::ValueArg<std::string> outputArg(
"o", "output", "Path to output dictionary", true /* required */,
"" /* default */, "file" /* type */, cmd);
- TCLAP::ValueArg<string> inputArg("i", "input", "Path to input dictionary",
- true /* required */, "" /* default */,
- "file" /* type */, cmd);
+ TCLAP::ValueArg<std::string> inputArg(
+ "i", "input", "Path to input dictionary", true /* required */,
+ "" /* default */, "file" /* type */, cmd);
cmd.parse(argc, argv);
ConvertDictionary(inputArg.getValue(), outputArg.getValue(),
fromArg.getValue(), toArg.getValue());
/*
* Open Chinese Convert
*
- * Copyright 2015 BYVoid <byvoid@byvoid.com>
+ * Copyright 2015 Carbo Kuo <byvoid@byvoid.com>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* limitations under the License.
*/
-#include "PhraseExtract.hpp"
+#include <fstream>
+
#include "CmdLineOutput.hpp"
+#include "PhraseExtract.hpp"
using opencc::Exception;
using opencc::PhraseExtract;
using opencc::UTF8StringSlice;
-void Extract(const vector<string>& inputFiles, const string& outputFile) {
+void Extract(const std::vector<std::string>& inputFiles,
+ const std::string& outputFile) {
std::ostringstream buffer;
for (const auto& inputFile : inputFiles) {
std::ifstream ifs(inputFile);
- const string contents((std::istreambuf_iterator<char>(ifs)),
- (std::istreambuf_iterator<char>()));
+ const std::string contents((std::istreambuf_iterator<char>(ifs)),
+ (std::istreambuf_iterator<char>()));
buffer << contents;
}
- const string& text = buffer.str();
+ const std::string& text = buffer.str();
PhraseExtract extractor;
extractor.SetWordMaxLength(2);
extractor.SetPrefixSetLength(1);
VERSION);
CmdLineOutput cmdLineOutput;
cmd.setOutput(&cmdLineOutput);
- TCLAP::UnlabeledMultiArg<string> fileNames("fileName", "Input files",
- true /* required */, "files");
+ TCLAP::UnlabeledMultiArg<std::string> fileNames(
+ "fileName", "Input files", true /* required */, "files");
cmd.add(fileNames);
- TCLAP::ValueArg<string> outputArg("o", "output", "Output file",
- true /* required */, "" /* default */,
- "file" /* type */, cmd);
+ TCLAP::ValueArg<std::string> outputArg(
+ "o", "output", "Output file", true /* required */, "" /* default */,
+ "file" /* type */, cmd);
cmd.parse(argc, argv);
Extract(fileNames.getValue(), outputArg.getValue());
} catch (TCLAP::ArgException& e) {
--- /dev/null
+cmake -S. -Bbuild -DCMAKE_INSTALL_PREFIX:PATH=. -DENABLE_GTEST:BOOL=ON -DCMAKE_BUILD_TYPE=Debug
+cmake --build build --config Debug --target install
+cd build
+ctest --verbose -C Debug
endforeach (CONFIG_TEST_FILE)
if (ENABLE_GTEST)
+ if (WIN32)
+ add_custom_target(
+ copy_gtest_to_test
+ ${CMAKE_COMMAND} -E copy $<TARGET_FILE:gtest> ${CMAKE_CURRENT_BINARY_DIR}
+ COMMENT "Copy gtest"
+ )
+ add_custom_target(
+ copy_gtest_main_to_test
+ ${CMAKE_COMMAND} -E copy $<TARGET_FILE:gtest_main> ${CMAKE_CURRENT_BINARY_DIR}
+ COMMENT "Copy gtest_main"
+ )
+ endif()
+
include_directories(../deps/gtest-1.7.0/include)
set(UNITTESTS
CommandLineConvertTest
add_executable(${UNITTEST} ${UNITTEST}.cpp)
target_link_libraries(${UNITTEST} gtest gtest_main libopencc)
add_test(${UNITTEST} ${UNITTEST})
+ if (WIN32)
+ add_dependencies(${UNITTEST} copy_gtest_to_test copy_gtest_main_to_test)
+ endif()
endforeach(UNITTEST)
endif()
/*
* Open Chinese Convert
*
- * Copyright 2015 BYVoid <byvoid@byvoid.com>
+ * Copyright 2015 Carbo Kuo <byvoid@byvoid.com>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* limitations under the License.
*/
+#include <fstream>
+
#include "Common.hpp"
#include "gtest/gtest.h"
virtual void TearDown() { ASSERT_EQ(0, chdir(originalWorkingDirectory)); }
- string GetFileContents(const string& fileName) const {
+ std::string GetFileContents(const std::string& fileName) const {
std::ifstream fs(fileName);
EXPECT_TRUE(fs.is_open());
- const string content((std::istreambuf_iterator<char>(fs)),
- (std::istreambuf_iterator<char>()));
+ const std::string content((std::istreambuf_iterator<char>(fs)),
+ (std::istreambuf_iterator<char>()));
fs.close();
return content;
}
}
const char* OpenccCommand() const {
+#ifndef _MSC_VER
return PROJECT_BINARY_DIR "/src/tools/opencc";
+#else
+#ifdef NDEBUG
+ return PROJECT_BINARY_DIR "/src/tools/Release/opencc.exe";
+#else
+ return PROJECT_BINARY_DIR "/src/tools/Debug/opencc.exe";
+#endif
+#endif
}
const char* InputDirectory() const {
return CMAKE_SOURCE_DIR "/data/config/";
}
- string OutputFile(const char* config) const {
- return string(OutputDirectory()) + config + ".out";
+ std::string OutputFile(const char* config) const {
+ return std::string(OutputDirectory()) + config + ".out";
}
- string AnswerFile(const char* config) const {
- return string(AnswerDirectory()) + config + ".ans";
+ std::string AnswerFile(const char* config) const {
+ return std::string(AnswerDirectory()) + config + ".ans";
}
- string TestCommand(const char* config) const {
- return OpenccCommand() + string("") + " -i " + InputDirectory() + config +
- ".in" + " -o " + OutputFile(config) + " -c " +
+ std::string TestCommand(const char* config) const {
+ return OpenccCommand() + std::string("") + " -i " + InputDirectory() +
+ config + ".in" + " -o " + OutputFile(config) + " -c " +
ConfigurationDirectory() + config + ".json";
}
TEST_P(ConfigurationTest, Convert) {
const char* config = GetParam();
ASSERT_EQ(0, system(TestCommand(config).c_str()));
- const string& output = GetFileContents(OutputFile(config));
- const string& answer = GetFileContents(AnswerFile(config));
+ const std::string& output = GetFileContents(OutputFile(config));
+ const std::string& answer = GetFileContents(AnswerFile(config));
ASSERT_EQ(answer, output);
}